sql-cli 1.71.3

SQL query tool for CSV/JSON with both interactive TUI and non-interactive CLI modes - perfect for exploration and automation
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
//! Core statement executor - unified execution path for all modes
//!
//! This module provides the single source of truth for executing SQL statements.
//! Both script mode and single query mode should use this executor to ensure
//! consistent behavior and eliminate code duplication.

use anyhow::Result;
use std::sync::Arc;
use std::time::Instant;

use crate::data::data_view::DataView;
use crate::data::datatable::DataTable;
use crate::data::query_engine::QueryEngine;
use crate::query_plan::{create_pipeline_with_config, IntoClauseRemover};
use crate::sql::parser::ast::SelectStatement;

use super::config::ExecutionConfig;
use super::context::ExecutionContext;

/// Result of executing a SQL statement
#[derive(Debug)]
pub struct ExecutionResult {
    /// The resulting DataView from query execution
    pub dataview: DataView,

    /// Execution statistics
    pub stats: ExecutionStats,

    /// The transformed AST (after preprocessing)
    pub transformed_ast: Option<SelectStatement>,
}

/// Statistics about query execution
#[derive(Debug, Clone)]
pub struct ExecutionStats {
    /// Time spent preprocessing (transforming AST)
    pub preprocessing_time_ms: f64,

    /// Time spent executing the query
    pub execution_time_ms: f64,

    /// Total time (preprocessing + execution)
    pub total_time_ms: f64,

    /// Number of rows in result
    pub row_count: usize,

    /// Number of columns in result
    pub column_count: usize,

    /// Whether preprocessing was applied
    pub preprocessing_applied: bool,
}

impl ExecutionStats {
    fn new() -> Self {
        Self {
            preprocessing_time_ms: 0.0,
            execution_time_ms: 0.0,
            total_time_ms: 0.0,
            row_count: 0,
            column_count: 0,
            preprocessing_applied: false,
        }
    }
}

/// Core statement executor
///
/// This is the unified execution engine used by both script mode and single query mode.
/// It ensures consistent behavior by:
/// 1. Parsing SQL exactly once
/// 2. Applying preprocessing pipeline exactly once (if needed)
/// 3. Executing the AST directly (no re-parsing)
/// 4. Managing temp tables and context properly
pub struct StatementExecutor {
    config: ExecutionConfig,
}

impl StatementExecutor {
    /// Create a new statement executor with default configuration
    pub fn new() -> Self {
        Self {
            config: ExecutionConfig::default(),
        }
    }

    /// Create executor with custom configuration
    pub fn with_config(config: ExecutionConfig) -> Self {
        Self { config }
    }

    /// Execute a single SQL statement that has already been parsed
    ///
    /// # Arguments
    /// * `stmt` - The parsed SQL statement (AST)
    /// * `context` - Execution context (temp tables, variables, etc.)
    ///
    /// # Returns
    /// ExecutionResult containing the DataView and statistics
    ///
    /// # Example
    /// ```ignore
    /// let executor = StatementExecutor::new();
    /// let mut context = ExecutionContext::new(source_table);
    /// let stmt = Parser::new("SELECT * FROM test").parse()?;
    /// let result = executor.execute(&stmt, &mut context)?;
    /// ```
    pub fn execute(
        &self,
        stmt: SelectStatement,
        context: &mut ExecutionContext,
    ) -> Result<ExecutionResult> {
        let total_start = Instant::now();
        let mut stats = ExecutionStats::new();

        // Step 0: Check if this statement has an INTO clause (before preprocessing removes it!)
        let into_table_name = stmt.into_table.as_ref().map(|it| it.name.clone());

        // Step 1: Determine source table
        // NOTE: This is determined BEFORE preprocessing, so any CTEs added by
        // transformers (like ExpressionLifter) won't be visible yet.
        // For queries that will have CTEs added during preprocessing, we extract
        // the base table from the inner query structure.
        let source_table = if let Some(ref from_source) = stmt.from_source {
            match from_source {
                crate::sql::parser::ast::TableSource::Table(table_name) => {
                    // Could be a regular table or will become a CTE reference after preprocessing
                    context.resolve_table(table_name)
                }
                crate::sql::parser::ast::TableSource::DerivedTable { query, .. } => {
                    // For derived tables, recursively find the base table
                    Self::extract_base_table(&**query, context)
                }
                crate::sql::parser::ast::TableSource::Pivot { source, .. } => {
                    // For PIVOT (though it should be expanded), extract from source
                    Self::extract_base_table_from_source(source, context, &stmt)
                }
            }
        } else {
            // Fallback to deprecated field for backward compatibility
            #[allow(deprecated)]
            if let Some(ref from_table) = stmt.from_table {
                context.resolve_table(from_table)
            } else {
                // No FROM clause - use DUAL table for expression evaluation
                Arc::new(DataTable::dual())
            }
        };

        // Step 2: Apply preprocessing pipeline (if applicable)
        let preprocess_start = Instant::now();
        let (transformed_stmt, preprocessing_applied) = self.apply_preprocessing(stmt)?;
        stats.preprocessing_time_ms = preprocess_start.elapsed().as_secs_f64() * 1000.0;
        stats.preprocessing_applied = preprocessing_applied;

        // Step 2.5: Re-check source table if preprocessing added CTEs
        // If the transformed statement has CTEs (e.g., from ExpressionLifter),
        // we need to use the source table that was passed in, not the extracted base table,
        // because the CTE itself becomes the data source.
        let final_source_table = if !transformed_stmt.ctes.is_empty() {
            // Has CTEs - need to extract base table from the actual CTE definitions
            // to get the underlying data source
            Self::extract_base_table(&transformed_stmt, context)
        } else {
            source_table
        };

        // Step 3: Execute the transformed statement directly via QueryEngine
        let exec_start = Instant::now();
        let result_view =
            self.execute_ast(transformed_stmt.clone(), final_source_table, context)?;
        stats.execution_time_ms = exec_start.elapsed().as_secs_f64() * 1000.0;

        // Step 4: If this was a SELECT INTO statement, store the result as a temp table
        if let Some(table_name) = into_table_name {
            // Materialize the view into a DataTable using QueryEngine's method
            let engine = QueryEngine::with_case_insensitive(self.config.case_insensitive);
            let temp_table = engine.materialize_view(result_view.clone())?;

            // Store in temp table registry
            context.store_temp_table(table_name.clone(), Arc::new(temp_table))?;
            tracing::debug!("Stored temp table: {}", table_name);
        }

        // Step 5: Collect statistics
        stats.total_time_ms = total_start.elapsed().as_secs_f64() * 1000.0;
        stats.row_count = result_view.row_count();
        stats.column_count = result_view.column_count();

        Ok(ExecutionResult {
            dataview: result_view,
            stats,
            transformed_ast: Some(transformed_stmt),
        })
    }

    /// Apply preprocessing pipeline to a statement
    ///
    /// Returns (transformed_statement, preprocessing_applied)
    fn apply_preprocessing(&self, mut stmt: SelectStatement) -> Result<(SelectStatement, bool)> {
        // Check if statement has a FROM clause - only preprocess if it does
        // (queries without FROM have special semantics in this tool)
        let has_from_clause = if stmt.from_source.is_some() {
            true
        } else {
            // Fallback to deprecated fields
            #[allow(deprecated)]
            {
                stmt.from_table.is_some()
                    || stmt.from_subquery.is_some()
                    || stmt.from_function.is_some()
            }
        };

        if !has_from_clause {
            // No preprocessing for queries without FROM
            return Ok((stmt, false));
        }

        // Create preprocessing pipeline with configured transformers
        let mut pipeline = create_pipeline_with_config(
            self.config.show_preprocessing,
            self.config.show_sql_transformations,
            self.config.transformer_config.clone(),
        );

        // Apply transformations
        match pipeline.process(stmt.clone()) {
            Ok(transformed) => {
                // Remove INTO clause if present (executor doesn't handle INTO syntax)
                let final_stmt = if transformed.into_table.is_some() {
                    IntoClauseRemover::remove_into_clause(transformed)
                } else {
                    transformed
                };

                Ok((final_stmt, true))
            }
            Err(e) => {
                // If preprocessing fails, fall back to original statement
                tracing::debug!("Preprocessing failed: {}, using original statement", e);

                // Still remove INTO clause even on fallback
                let fallback = if stmt.into_table.is_some() {
                    IntoClauseRemover::remove_into_clause(stmt)
                } else {
                    stmt
                };

                Ok((fallback, false))
            }
        }
    }

    /// Execute an AST directly using QueryEngine
    ///
    /// This is the core execution method - it takes a parsed/transformed AST
    /// and executes it directly without re-parsing.
    fn execute_ast(
        &self,
        stmt: SelectStatement,
        source_table: Arc<DataTable>,
        context: &ExecutionContext,
    ) -> Result<DataView> {
        // Create QueryEngine with case sensitivity setting
        let engine = QueryEngine::with_case_insensitive(self.config.case_insensitive);

        // Execute the statement with temp table support
        // This is the key method that does the actual work
        engine.execute_statement_with_temp_tables(source_table, stmt, Some(&context.temp_tables))
    }

    /// Get the current configuration
    pub fn config(&self) -> &ExecutionConfig {
        &self.config
    }

    /// Update configuration
    pub fn set_config(&mut self, config: ExecutionConfig) {
        self.config = config;
    }

    /// Extract the base table from a SelectStatement
    /// Recursively traverses derived tables and CTEs to find the underlying table
    fn extract_base_table(stmt: &SelectStatement, context: &ExecutionContext) -> Arc<DataTable> {
        // If the query has CTEs, we need to find the base table by looking into them
        if !stmt.ctes.is_empty() {
            // Find the ultimate base table by traversing CTEs
            return Self::extract_base_table_from_ctes(stmt, context);
        }

        if let Some(ref from_source) = stmt.from_source {
            Self::extract_base_table_from_source(from_source, context, stmt)
        } else {
            // Fallback to deprecated fields
            #[allow(deprecated)]
            if let Some(ref from_table) = stmt.from_table {
                context.resolve_table(from_table)
            } else {
                Arc::new(DataTable::dual())
            }
        }
    }

    /// Extract base table when the statement has CTEs
    fn extract_base_table_from_ctes(
        stmt: &SelectStatement,
        context: &ExecutionContext,
    ) -> Arc<DataTable> {
        use crate::sql::parser::ast::CTEType;

        // Start from the main query's FROM and recursively find the base table
        if let Some(ref from_source) = stmt.from_source {
            match from_source {
                crate::sql::parser::ast::TableSource::Table(table_name) => {
                    // Check if this references a CTE
                    for cte in &stmt.ctes {
                        if &cte.name == table_name {
                            // Found the CTE - extract from its query
                            if let CTEType::Standard(cte_query) = &cte.cte_type {
                                return Self::extract_base_table(cte_query, context);
                            }
                        }
                    }
                    // Not a CTE - resolve as regular table
                    context.resolve_table(table_name)
                }
                crate::sql::parser::ast::TableSource::DerivedTable { query, .. } => {
                    Self::extract_base_table(&**query, context)
                }
                crate::sql::parser::ast::TableSource::Pivot { source, .. } => {
                    Self::extract_base_table_from_source(&**source, context, stmt)
                }
            }
        } else {
            Arc::new(DataTable::dual())
        }
    }

    /// Extract base table from a TableSource
    fn extract_base_table_from_source(
        source: &crate::sql::parser::ast::TableSource,
        context: &ExecutionContext,
        stmt: &SelectStatement,
    ) -> Arc<DataTable> {
        match source {
            crate::sql::parser::ast::TableSource::Table(table_name) => {
                // Check if this is a CTE reference
                for cte in &stmt.ctes {
                    if &cte.name == table_name {
                        // This is a CTE - extract from it
                        use crate::sql::parser::ast::CTEType;
                        if let CTEType::Standard(cte_query) = &cte.cte_type {
                            return Self::extract_base_table(cte_query, context);
                        }
                    }
                }
                // Not a CTE - resolve as regular table
                context.resolve_table(table_name)
            }
            crate::sql::parser::ast::TableSource::DerivedTable { query, .. } => {
                // Recursively extract from nested derived table
                Self::extract_base_table(&**query, context)
            }
            crate::sql::parser::ast::TableSource::Pivot { source, .. } => {
                // Extract from PIVOT source
                Self::extract_base_table_from_source(&**source, context, stmt)
            }
        }
    }
}

impl Default for StatementExecutor {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::data::datatable::{DataColumn, DataRow, DataType, DataValue};
    use crate::sql::recursive_parser::Parser;

    fn create_test_table(name: &str, rows: usize) -> DataTable {
        let mut table = DataTable::new(name);
        table.add_column(DataColumn::new("id").with_type(DataType::Integer));
        table.add_column(DataColumn::new("name").with_type(DataType::String));

        for i in 0..rows {
            let _ = table.add_row(DataRow {
                values: vec![
                    DataValue::Integer(i as i64),
                    DataValue::String(format!("name_{}", i)),
                ],
            });
        }

        table
    }

    #[test]
    fn test_new_executor() {
        let executor = StatementExecutor::new();
        assert!(!executor.config().case_insensitive);
        assert!(!executor.config().show_preprocessing);
    }

    #[test]
    fn test_executor_with_config() {
        let config = ExecutionConfig::new()
            .with_case_insensitive(true)
            .with_show_preprocessing(true);

        let executor = StatementExecutor::with_config(config);
        assert!(executor.config().case_insensitive);
        assert!(executor.config().show_preprocessing);
    }

    #[test]
    fn test_execute_simple_select() {
        let table = create_test_table("test", 10);
        let mut context = ExecutionContext::new(Arc::new(table));
        let executor = StatementExecutor::new();

        // Parse and execute a simple SELECT
        let mut parser = Parser::new("SELECT id, name FROM test WHERE id < 5");
        let stmt = parser.parse().unwrap();

        let result = executor.execute(stmt, &mut context).unwrap();

        assert_eq!(result.dataview.row_count(), 5);
        assert_eq!(result.dataview.column_count(), 2);
        assert!(result.stats.total_time_ms >= 0.0);
    }

    #[test]
    fn test_execute_select_star() {
        let table = create_test_table("test", 5);
        let mut context = ExecutionContext::new(Arc::new(table));
        let executor = StatementExecutor::new();

        let mut parser = Parser::new("SELECT * FROM test");
        let stmt = parser.parse().unwrap();

        let result = executor.execute(stmt, &mut context).unwrap();

        assert_eq!(result.dataview.row_count(), 5);
        assert_eq!(result.dataview.column_count(), 2);
    }

    #[test]
    fn test_execute_with_dual() {
        let table = create_test_table("test", 5);
        let mut context = ExecutionContext::new(Arc::new(table));
        let executor = StatementExecutor::new();

        // Query without FROM - should use DUAL
        let mut parser = Parser::new("SELECT 1+1 as result");
        let stmt = parser.parse().unwrap();

        let result = executor.execute(stmt, &mut context).unwrap();

        assert_eq!(result.dataview.row_count(), 1);
        assert_eq!(result.dataview.column_count(), 1);
    }

    #[test]
    fn test_execute_with_temp_table() {
        let base_table = create_test_table("base", 10);
        let mut context = ExecutionContext::new(Arc::new(base_table));
        let executor = StatementExecutor::new();

        // Create and store a temp table
        let temp_table = create_test_table("#temp", 3);
        context
            .store_temp_table("#temp".to_string(), Arc::new(temp_table))
            .unwrap();

        // Query the temp table
        let mut parser = Parser::new("SELECT * FROM #temp");
        let stmt = parser.parse().unwrap();

        let result = executor.execute(stmt, &mut context).unwrap();

        assert_eq!(result.dataview.row_count(), 3);
    }

    #[test]
    fn test_preprocessing_applied_with_from() {
        let table = create_test_table("test", 10);
        let mut context = ExecutionContext::new(Arc::new(table));
        let executor = StatementExecutor::new();

        // Query with FROM - preprocessing should be attempted
        let mut parser = Parser::new("SELECT id FROM test WHERE id > 0");
        let stmt = parser.parse().unwrap();

        let result = executor.execute(stmt, &mut context).unwrap();

        // Preprocessing should have been attempted (may or may not transform anything)
        assert!(result.stats.preprocessing_time_ms >= 0.0);
    }

    #[test]
    fn test_no_preprocessing_without_from() {
        let table = create_test_table("test", 10);
        let mut context = ExecutionContext::new(Arc::new(table));
        let executor = StatementExecutor::new();

        // Query without FROM - no preprocessing
        let mut parser = Parser::new("SELECT 42 as answer");
        let stmt = parser.parse().unwrap();

        let result = executor.execute(stmt, &mut context).unwrap();

        // No preprocessing should have been applied
        assert!(!result.stats.preprocessing_applied);
    }

    #[test]
    fn test_execution_stats() {
        let table = create_test_table("test", 100);
        let mut context = ExecutionContext::new(Arc::new(table));
        let executor = StatementExecutor::new();

        let mut parser = Parser::new("SELECT * FROM test WHERE id < 50");
        let stmt = parser.parse().unwrap();

        let result = executor.execute(stmt, &mut context).unwrap();

        let stats = result.stats;
        assert_eq!(stats.row_count, 50);
        assert_eq!(stats.column_count, 2);
        assert!(stats.total_time_ms >= 0.0);
        assert!(stats.total_time_ms >= stats.preprocessing_time_ms);
        assert!(stats.total_time_ms >= stats.execution_time_ms);
    }

    #[test]
    fn test_case_insensitive_execution() {
        let table = create_test_table("test", 10);
        let mut context = ExecutionContext::new(Arc::new(table));

        let config = ExecutionConfig::new().with_case_insensitive(true);
        let executor = StatementExecutor::with_config(config);

        // Use uppercase column name - should work with case insensitive
        let mut parser = Parser::new("SELECT ID FROM test");
        let stmt = parser.parse().unwrap();

        let result = executor.execute(stmt, &mut context);

        // Should succeed with case insensitive mode
        assert!(result.is_ok());
    }
}