sql_cli/
non_interactive.rs

1use anyhow::{Context, Result};
2use std::fs;
3use std::io::{self, Write};
4use std::path::Path;
5use std::time::Instant;
6use tracing::{debug, info};
7
8use crate::config::config::Config;
9use crate::data::data_view::DataView;
10use crate::data::datatable::{DataTable, DataValue};
11use crate::data::datatable_loaders::{load_csv_to_datatable, load_json_to_datatable};
12use crate::services::query_execution_service::QueryExecutionService;
13use crate::sql::script_parser::{ScriptParser, ScriptResult};
14
15/// Output format for query results
16#[derive(Debug, Clone)]
17pub enum OutputFormat {
18    Csv,
19    Json,
20    Table,
21    Tsv,
22}
23
24impl OutputFormat {
25    pub fn from_str(s: &str) -> Result<Self> {
26        match s.to_lowercase().as_str() {
27            "csv" => Ok(OutputFormat::Csv),
28            "json" => Ok(OutputFormat::Json),
29            "table" => Ok(OutputFormat::Table),
30            "tsv" => Ok(OutputFormat::Tsv),
31            _ => Err(anyhow::anyhow!(
32                "Invalid output format: {}. Use csv, json, table, or tsv",
33                s
34            )),
35        }
36    }
37}
38
39/// Configuration for non-interactive query execution
40pub struct NonInteractiveConfig {
41    pub data_file: String,
42    pub query: String,
43    pub output_format: OutputFormat,
44    pub output_file: Option<String>,
45    pub case_insensitive: bool,
46    pub auto_hide_empty: bool,
47    pub limit: Option<usize>,
48    pub query_plan: bool,
49    pub execution_plan: bool,
50    pub script_file: Option<String>, // Path to the script file for relative path resolution
51}
52
53/// Execute a query in non-interactive mode
54pub fn execute_non_interactive(config: NonInteractiveConfig) -> Result<()> {
55    let start_time = Instant::now();
56
57    // Check if query uses DUAL or has no FROM clause
58    use crate::sql::recursive_parser::{Parser, SelectStatement};
59
60    fn check_statement_for_range(stmt: &SelectStatement) -> bool {
61        // Check main query
62        if stmt.from_function.is_some() {
63            return true;
64        }
65
66        // Check if it's DUAL or no FROM
67        if stmt
68            .from_table
69            .as_ref()
70            .is_some_and(|t| t.to_uppercase() == "DUAL")
71        {
72            return true;
73        }
74
75        if stmt.from_table.is_none() && stmt.from_subquery.is_none() && stmt.from_function.is_none()
76        {
77            return true;
78        }
79
80        // Recursively check CTEs
81        for cte in &stmt.ctes {
82            if check_statement_for_range(&cte.query) {
83                return true;
84            }
85        }
86
87        // Check subqueries
88        if let Some(ref subquery) = stmt.from_subquery {
89            if check_statement_for_range(subquery) {
90                return true;
91            }
92        }
93
94        false
95    }
96
97    let mut parser = Parser::new(&config.query);
98    let statement = parser
99        .parse()
100        .map_err(|e| anyhow::anyhow!("Parse error: {}", e))?;
101
102    // 1. Load the data file or create DUAL table
103    let (data_table, _is_dual) =
104        if check_statement_for_range(&statement) || config.data_file.is_empty() {
105            info!("Using DUAL table for expression evaluation");
106            (crate::data::datatable::DataTable::dual(), true)
107        } else {
108            info!("Loading data from: {}", config.data_file);
109            let table = load_data_file(&config.data_file)?;
110            info!(
111                "Loaded {} rows with {} columns",
112                table.row_count(),
113                table.column_count()
114            );
115            (table, false)
116        };
117    let _table_name = data_table.name.clone();
118
119    // 2. Create a DataView from the table
120    let dataview = DataView::new(std::sync::Arc::new(data_table));
121
122    // 3. Execute the query
123    info!("Executing query: {}", config.query);
124
125    // If execution_plan is requested, show detailed execution information
126    if config.execution_plan {
127        println!("\n=== EXECUTION PLAN ===");
128        println!("Query: {}", config.query);
129        println!("\nExecution Steps:");
130        println!("1. PARSE - Parse SQL query");
131        println!("2. LOAD_DATA - Load data from {}", &config.data_file);
132        println!(
133            "   • Loaded {} rows, {} columns",
134            dataview.row_count(),
135            dataview.column_count()
136        );
137    }
138
139    // If query_plan is requested, parse and display the AST
140    if config.query_plan {
141        use crate::sql::recursive_parser::Parser;
142        let mut parser = Parser::new(&config.query);
143        match parser.parse() {
144            Ok(statement) => {
145                println!("\n=== QUERY PLAN (AST) ===");
146                println!("{statement:#?}");
147                println!("=== END QUERY PLAN ===\n");
148            }
149            Err(e) => {
150                eprintln!("Failed to parse query for plan: {e}");
151            }
152        }
153    }
154
155    let query_start = Instant::now();
156
157    // Load configuration file to get date notation and other settings
158    let app_config = Config::load().unwrap_or_else(|e| {
159        debug!("Could not load config file: {}. Using defaults.", e);
160        Config::default()
161    });
162
163    // Initialize global config for function registry
164    crate::config::global::init_config(app_config.clone());
165
166    // Use QueryExecutionService with full BehaviorConfig
167    let mut behavior_config = app_config.behavior.clone();
168    debug!(
169        "Using date notation: {}",
170        behavior_config.default_date_notation
171    );
172    // Command line args override config file settings
173    if config.case_insensitive {
174        behavior_config.case_insensitive_default = true;
175    }
176    if config.auto_hide_empty {
177        behavior_config.hide_empty_columns = true;
178    }
179
180    let query_service = QueryExecutionService::with_behavior_config(behavior_config);
181
182    let exec_start = Instant::now();
183    let result = query_service.execute(&config.query, Some(&dataview), Some(dataview.source()))?;
184    let exec_time = exec_start.elapsed();
185
186    let query_time = query_start.elapsed();
187    info!("Query executed in {:?}", query_time);
188    info!(
189        "Result: {} rows, {} columns",
190        result.dataview.row_count(),
191        result.dataview.column_count()
192    );
193
194    // Show execution plan details if requested
195    if config.execution_plan {
196        // Try to get detailed execution plan
197        use crate::data::query_engine::QueryEngine;
198
199        let query_engine = QueryEngine::new();
200
201        match query_engine.execute_with_plan(
202            std::sync::Arc::new(dataview.source().clone()),
203            &config.query,
204        ) {
205            Ok((_view, plan)) => {
206                // Display the detailed execution plan tree
207                print!("{}", plan.format_tree());
208            }
209            Err(e) => {
210                // Fall back to simple execution plan display
211                eprintln!("Could not generate detailed execution plan: {}", e);
212                println!(
213                    "3. QUERY_EXECUTION [{:.3}ms]",
214                    exec_time.as_secs_f64() * 1000.0
215                );
216
217                // Parse query to understand what operations are being performed
218                use crate::sql::recursive_parser::Parser;
219                let mut parser = Parser::new(&config.query);
220                if let Ok(stmt) = parser.parse() {
221                    if stmt.where_clause.is_some() {
222                        println!("   • WHERE clause filtering applied");
223                        println!("   • Rows after filter: {}", result.dataview.row_count());
224                    }
225
226                    if let Some(ref order_by) = stmt.order_by {
227                        println!("   • ORDER BY: {} column(s)", order_by.len());
228                    }
229
230                    if let Some(ref group_by) = stmt.group_by {
231                        println!("   • GROUP BY: {} column(s)", group_by.len());
232                    }
233
234                    if let Some(limit) = stmt.limit {
235                        println!("   • LIMIT: {} rows", limit);
236                    }
237
238                    if stmt.distinct {
239                        println!("   • DISTINCT applied");
240                    }
241                }
242            }
243        }
244
245        println!("\nExecution Statistics:");
246        println!(
247            "  Preparation:    {:.3}ms",
248            (exec_start - start_time).as_secs_f64() * 1000.0
249        );
250        println!(
251            "  Query time:     {:.3}ms",
252            exec_time.as_secs_f64() * 1000.0
253        );
254        println!(
255            "  Total time:     {:.3}ms",
256            query_time.as_secs_f64() * 1000.0
257        );
258        println!("  Rows returned:  {}", result.dataview.row_count());
259        println!("  Columns:        {}", result.dataview.column_count());
260        println!("\n=== END EXECUTION PLAN ===");
261        println!();
262    }
263
264    // 4. Apply limit if specified
265    let final_view = if let Some(limit) = config.limit {
266        let limited_table = limit_results(&result.dataview, limit)?;
267        DataView::new(std::sync::Arc::new(limited_table))
268    } else {
269        result.dataview
270    };
271
272    // 5. Output the results
273    let output_result = if let Some(ref path) = config.output_file {
274        let mut file = fs::File::create(path)
275            .with_context(|| format!("Failed to create output file: {path}"))?;
276        output_results(&final_view, config.output_format, &mut file)?;
277        info!("Results written to: {}", path);
278        Ok(())
279    } else {
280        output_results(&final_view, config.output_format, &mut io::stdout())?;
281        Ok(())
282    };
283
284    let total_time = start_time.elapsed();
285    debug!("Total execution time: {:?}", total_time);
286
287    // Print stats to stderr so they don't interfere with output
288    if config.output_file.is_none() {
289        eprintln!(
290            "\n# Query completed: {} rows in {:?}",
291            final_view.row_count(),
292            query_time
293        );
294    }
295
296    output_result
297}
298
299/// Execute a script file with multiple SQL statements separated by GO
300pub fn execute_script(config: NonInteractiveConfig) -> Result<()> {
301    let _start_time = Instant::now();
302
303    // Parse the script into individual statements
304    let parser = ScriptParser::new(&config.query);
305    let statements = parser.parse_and_validate()?;
306
307    info!("Found {} statements in script", statements.len());
308
309    // Determine data file to use (command-line overrides script hint)
310    let data_file = if !config.data_file.is_empty() {
311        // Command-line argument takes precedence
312        config.data_file.clone()
313    } else if let Some(hint) = parser.data_file_hint() {
314        // Use data file hint from script
315        info!("Using data file from script hint: {}", hint);
316
317        // Resolve relative paths relative to script file if provided
318        if let Some(script_path) = config.script_file.as_ref() {
319            let script_dir = std::path::Path::new(script_path)
320                .parent()
321                .unwrap_or(std::path::Path::new("."));
322            let hint_path = std::path::Path::new(hint);
323
324            if hint_path.is_relative() {
325                script_dir.join(hint_path).to_string_lossy().to_string()
326            } else {
327                hint.to_string()
328            }
329        } else {
330            hint.to_string()
331        }
332    } else {
333        String::new()
334    };
335
336    // Check if script needs a data file
337    let needs_data_file = if data_file.is_empty() {
338        // Parse statements to check if they use DUAL or RANGE
339        use crate::sql::recursive_parser::Parser;
340
341        let mut needs_file = false;
342        for statement_sql in &statements {
343            let mut parser = Parser::new(statement_sql);
344            match parser.parse() {
345                Ok(stmt) => {
346                    // Check if statement uses DUAL, RANGE, or has no FROM clause
347                    let uses_dual = stmt
348                        .from_table
349                        .as_ref()
350                        .map_or(false, |t| t.eq_ignore_ascii_case("dual"));
351                    let uses_range = stmt.from_function.is_some();
352                    let no_from = stmt.from_table.is_none()
353                        && stmt.from_subquery.is_none()
354                        && stmt.from_function.is_none();
355
356                    // Check CTEs for RANGE usage
357                    let cte_has_range = stmt.ctes.iter().any(|cte| {
358                        cte.query.from_function.is_some()
359                            || cte
360                                .query
361                                .from_table
362                                .as_ref()
363                                .map_or(false, |t| t.eq_ignore_ascii_case("dual"))
364                    });
365
366                    if !uses_dual && !uses_range && !no_from && !cte_has_range {
367                        needs_file = true;
368                        break;
369                    }
370                }
371                Err(_) => {
372                    // If we can't parse, assume it needs a data file
373                    needs_file = true;
374                    break;
375                }
376            }
377        }
378        needs_file
379    } else {
380        // Data file was specified, so we'll use it
381        false
382    };
383
384    // Load the data file once (or use DUAL)
385    let (data_table, _is_dual) = if data_file.is_empty() {
386        if needs_data_file {
387            anyhow::bail!(
388                "Script requires a data file. Either:\n\
389                1. Provide a data file: sql-cli data.csv -f script.sql\n\
390                2. Add a data hint to your script: -- #!data: path/to/data.csv"
391            );
392        } else {
393            // Script doesn't need a data file, use DUAL
394            info!("Using DUAL table for script execution");
395            (DataTable::dual(), true)
396        }
397    } else {
398        // Check if file exists before trying to load
399        if !std::path::Path::new(&data_file).exists() {
400            anyhow::bail!(
401                "Data file not found: {}\n\
402                Please check the path is correct",
403                data_file
404            );
405        }
406
407        info!("Loading data from: {}", data_file);
408        let table = load_data_file(&data_file)?;
409        info!(
410            "Loaded {} rows with {} columns",
411            table.row_count(),
412            table.column_count()
413        );
414        (table, false)
415    };
416
417    // Track script results
418    let mut script_result = ScriptResult::new();
419    let mut output = Vec::new();
420
421    // Create Arc<DataTable> once for all statements - avoids expensive cloning
422    let arc_data_table = std::sync::Arc::new(data_table);
423
424    // Execute each statement
425    for (idx, statement) in statements.iter().enumerate() {
426        let statement_num = idx + 1;
427        let stmt_start = Instant::now();
428
429        // Print separator for table format
430        if matches!(config.output_format, OutputFormat::Table) {
431            if idx > 0 {
432                output.push(String::new()); // Empty line between queries
433            }
434            output.push(format!("-- Query {} --", statement_num));
435        }
436
437        // Create a fresh DataView for each statement (reuses the Arc)
438        let dataview = DataView::new(arc_data_table.clone());
439
440        // Execute the statement
441        let service = QueryExecutionService::new(config.case_insensitive, config.auto_hide_empty);
442        match service.execute(statement, Some(&dataview), None) {
443            Ok(result) => {
444                let exec_time = stmt_start.elapsed().as_secs_f64() * 1000.0;
445                let final_view = result.dataview;
446
447                // Format the output based on the output format
448                let mut statement_output = Vec::new();
449                match config.output_format {
450                    OutputFormat::Csv => {
451                        output_csv(&final_view, &mut statement_output, ',')?;
452                    }
453                    OutputFormat::Json => {
454                        output_json(&final_view, &mut statement_output)?;
455                    }
456                    OutputFormat::Table => {
457                        output_table(&final_view, &mut statement_output)?;
458                        writeln!(
459                            &mut statement_output,
460                            "Query completed: {} rows in {:.2}ms",
461                            final_view.row_count(),
462                            exec_time
463                        )?;
464                    }
465                    OutputFormat::Tsv => {
466                        output_csv(&final_view, &mut statement_output, '\t')?;
467                    }
468                }
469
470                // Add to overall output
471                output.extend(
472                    String::from_utf8_lossy(&statement_output)
473                        .lines()
474                        .map(String::from),
475                );
476
477                script_result.add_success(
478                    statement_num,
479                    statement.clone(),
480                    final_view.row_count(),
481                    exec_time,
482                );
483            }
484            Err(e) => {
485                let exec_time = stmt_start.elapsed().as_secs_f64() * 1000.0;
486                let error_msg = format!("Query {} failed: {}", statement_num, e);
487
488                if matches!(config.output_format, OutputFormat::Table) {
489                    output.push(error_msg.clone());
490                }
491
492                script_result.add_failure(
493                    statement_num,
494                    statement.clone(),
495                    e.to_string(),
496                    exec_time,
497                );
498
499                // Continue to next statement (don't stop on error)
500            }
501        }
502    }
503
504    // Write output
505    if let Some(ref output_file) = config.output_file {
506        let mut file = fs::File::create(output_file)?;
507        for line in &output {
508            writeln!(file, "{}", line)?;
509        }
510        info!("Results written to: {}", output_file);
511    } else {
512        for line in &output {
513            println!("{}", line);
514        }
515    }
516
517    // Print summary if in table mode
518    if matches!(config.output_format, OutputFormat::Table) {
519        println!("\n=== Script Summary ===");
520        println!("Total statements: {}", script_result.total_statements);
521        println!("Successful: {}", script_result.successful_statements);
522        println!("Failed: {}", script_result.failed_statements);
523        println!(
524            "Total execution time: {:.2}ms",
525            script_result.total_execution_time_ms
526        );
527    }
528
529    if !script_result.all_successful() {
530        return Err(anyhow::anyhow!(
531            "{} of {} statements failed",
532            script_result.failed_statements,
533            script_result.total_statements
534        ));
535    }
536
537    Ok(())
538}
539
540/// Load a data file (CSV or JSON) into a `DataTable`
541fn load_data_file(path: &str) -> Result<DataTable> {
542    let path = Path::new(path);
543
544    if !path.exists() {
545        return Err(anyhow::anyhow!("File not found: {}", path.display()));
546    }
547
548    // Determine file type by extension
549    let extension = path
550        .extension()
551        .and_then(|ext| ext.to_str())
552        .map(str::to_lowercase)
553        .unwrap_or_default();
554
555    let table_name = path
556        .file_stem()
557        .and_then(|stem| stem.to_str())
558        .unwrap_or("data")
559        .to_string();
560
561    match extension.as_str() {
562        "csv" => load_csv_to_datatable(path, &table_name)
563            .with_context(|| format!("Failed to load CSV file: {}", path.display())),
564        "json" => load_json_to_datatable(path, &table_name)
565            .with_context(|| format!("Failed to load JSON file: {}", path.display())),
566        _ => Err(anyhow::anyhow!(
567            "Unsupported file type: {}. Use .csv or .json",
568            extension
569        )),
570    }
571}
572
573/// Limit the number of rows in results
574fn limit_results(dataview: &DataView, limit: usize) -> Result<DataTable> {
575    let source = dataview.source();
576    let mut limited_table = DataTable::new(&source.name);
577
578    // Copy columns
579    for col in &source.columns {
580        limited_table.add_column(col.clone());
581    }
582
583    // Copy limited rows
584    let rows_to_copy = dataview.row_count().min(limit);
585    for i in 0..rows_to_copy {
586        if let Some(row) = dataview.get_row(i) {
587            limited_table.add_row(row.clone());
588        }
589    }
590
591    Ok(limited_table)
592}
593
594/// Output query results in the specified format
595fn output_results<W: Write>(
596    dataview: &DataView,
597    format: OutputFormat,
598    writer: &mut W,
599) -> Result<()> {
600    match format {
601        OutputFormat::Csv => output_csv(dataview, writer, ','),
602        OutputFormat::Tsv => output_csv(dataview, writer, '\t'),
603        OutputFormat::Json => output_json(dataview, writer),
604        OutputFormat::Table => output_table(dataview, writer),
605    }
606}
607
608/// Output results as CSV/TSV
609fn output_csv<W: Write>(dataview: &DataView, writer: &mut W, delimiter: char) -> Result<()> {
610    // Write headers
611    let columns = dataview.column_names();
612    for (i, col) in columns.iter().enumerate() {
613        if i > 0 {
614            write!(writer, "{delimiter}")?;
615        }
616        write!(writer, "{}", escape_csv_field(col, delimiter))?;
617    }
618    writeln!(writer)?;
619
620    // Write rows
621    for row_idx in 0..dataview.row_count() {
622        if let Some(row) = dataview.get_row(row_idx) {
623            for (i, value) in row.values.iter().enumerate() {
624                if i > 0 {
625                    write!(writer, "{delimiter}")?;
626                }
627                write!(
628                    writer,
629                    "{}",
630                    escape_csv_field(&format_value(value), delimiter)
631                )?;
632            }
633            writeln!(writer)?;
634        }
635    }
636
637    Ok(())
638}
639
640/// Output results as JSON
641fn output_json<W: Write>(dataview: &DataView, writer: &mut W) -> Result<()> {
642    let columns = dataview.column_names();
643    let mut rows = Vec::new();
644
645    for row_idx in 0..dataview.row_count() {
646        if let Some(row) = dataview.get_row(row_idx) {
647            let mut json_row = serde_json::Map::new();
648            for (col_idx, value) in row.values.iter().enumerate() {
649                if col_idx < columns.len() {
650                    json_row.insert(columns[col_idx].clone(), value_to_json(value));
651                }
652            }
653            rows.push(serde_json::Value::Object(json_row));
654        }
655    }
656
657    let json = serde_json::to_string_pretty(&rows)?;
658    writeln!(writer, "{json}")?;
659
660    Ok(())
661}
662
663/// Output results as an ASCII table
664fn output_table<W: Write>(dataview: &DataView, writer: &mut W) -> Result<()> {
665    let columns = dataview.column_names();
666
667    // Calculate column widths
668    let mut widths = vec![0; columns.len()];
669    for (i, col) in columns.iter().enumerate() {
670        widths[i] = col.len();
671    }
672
673    // Check first 100 rows for width calculation
674    let sample_size = dataview.row_count().min(100);
675    for row_idx in 0..sample_size {
676        if let Some(row) = dataview.get_row(row_idx) {
677            for (i, value) in row.values.iter().enumerate() {
678                if i < widths.len() {
679                    let value_str = format_value(value);
680                    widths[i] = widths[i].max(value_str.len());
681                }
682            }
683        }
684    }
685
686    // Limit column widths to 50 characters
687    for width in &mut widths {
688        *width = (*width).min(50);
689    }
690
691    // Print header separator
692    write!(writer, "+")?;
693    for width in &widths {
694        write!(writer, "-{}-+", "-".repeat(*width))?;
695    }
696    writeln!(writer)?;
697
698    // Print headers
699    write!(writer, "|")?;
700    for (i, col) in columns.iter().enumerate() {
701        write!(writer, " {:^width$} |", col, width = widths[i])?;
702    }
703    writeln!(writer)?;
704
705    // Print header separator
706    write!(writer, "+")?;
707    for width in &widths {
708        write!(writer, "-{}-+", "-".repeat(*width))?;
709    }
710    writeln!(writer)?;
711
712    // Print rows
713    for row_idx in 0..dataview.row_count() {
714        if let Some(row) = dataview.get_row(row_idx) {
715            write!(writer, "|")?;
716            for (i, value) in row.values.iter().enumerate() {
717                if i < widths.len() {
718                    let value_str = format_value(value);
719                    let truncated = if value_str.len() > widths[i] {
720                        format!("{}...", &value_str[..widths[i] - 3])
721                    } else {
722                        value_str
723                    };
724                    write!(writer, " {:<width$} |", truncated, width = widths[i])?;
725                }
726            }
727            writeln!(writer)?;
728        }
729    }
730
731    // Print bottom separator
732    write!(writer, "+")?;
733    for width in &widths {
734        write!(writer, "-{}-+", "-".repeat(*width))?;
735    }
736    writeln!(writer)?;
737
738    Ok(())
739}
740
741/// Format a `DataValue` for display
742fn format_value(value: &DataValue) -> String {
743    match value {
744        DataValue::Null => String::new(),
745        DataValue::Integer(i) => i.to_string(),
746        DataValue::Float(f) => f.to_string(),
747        DataValue::String(s) => s.clone(),
748        DataValue::InternedString(s) => s.to_string(),
749        DataValue::Boolean(b) => b.to_string(),
750        DataValue::DateTime(dt) => dt.to_string(),
751    }
752}
753
754/// Convert `DataValue` to JSON
755fn value_to_json(value: &DataValue) -> serde_json::Value {
756    match value {
757        DataValue::Null => serde_json::Value::Null,
758        DataValue::Integer(i) => serde_json::Value::Number((*i).into()),
759        DataValue::Float(f) => {
760            if let Some(n) = serde_json::Number::from_f64(*f) {
761                serde_json::Value::Number(n)
762            } else {
763                serde_json::Value::Null
764            }
765        }
766        DataValue::String(s) => serde_json::Value::String(s.clone()),
767        DataValue::InternedString(s) => serde_json::Value::String(s.to_string()),
768        DataValue::Boolean(b) => serde_json::Value::Bool(*b),
769        DataValue::DateTime(dt) => serde_json::Value::String(dt.to_string()),
770    }
771}
772
773/// Escape a CSV field if it contains special characters
774fn escape_csv_field(field: &str, delimiter: char) -> String {
775    if field.contains(delimiter)
776        || field.contains('"')
777        || field.contains('\n')
778        || field.contains('\r')
779    {
780        format!("\"{}\"", field.replace('"', "\"\""))
781    } else {
782        field.to_string()
783    }
784}