#![allow(unused_variables)]
use crate::Result;
use crate::storage::StorageEngine;
use super::logical_plan::LogicalPlan;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ExplainMode {
Standard,
Verbose,
AI,
Analyze,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ExplainFormat {
Text,
JSON,
YAML,
Tree,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExplainOutput {
pub plan: PlanNode,
pub total_cost: f64,
pub total_rows: usize,
pub planning_time_ms: f64,
pub config: ConfigSnapshot,
pub features: Vec<ActiveFeature>,
pub decisions: Vec<OptimizerDecision>,
pub why_not: Option<WhyNotAnalysis>,
pub ai_explanation: Option<AIExplanation>,
pub warnings: Vec<String>,
pub suggestions: Vec<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub actual_rows: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub actual_time_ms: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub execution_error: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PlanNode {
pub node_type: String,
pub operation: String,
pub cost: f64,
pub rows: usize,
pub details: HashMap<String, String>,
pub children: Vec<PlanNode>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConfigSnapshot {
pub work_mem_mb: usize,
pub enable_hashjoin: bool,
pub enable_mergejoin: bool,
pub enable_nestloop: bool,
pub enable_indexscan: bool,
pub enable_seqscan: bool,
pub max_parallel_workers: usize,
pub enable_simd: bool,
}
impl Default for ConfigSnapshot {
fn default() -> Self {
Self {
work_mem_mb: 256,
enable_hashjoin: true,
enable_mergejoin: true,
enable_nestloop: true,
enable_indexscan: true,
enable_seqscan: true,
max_parallel_workers: 4,
enable_simd: true,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ActiveFeature {
pub name: String,
pub category: FeatureCategory,
pub trigger: String,
pub benefit: String,
pub savings_percent: Option<f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum FeatureCategory {
Pushdown, Pruning, Indexing, Vectorization, Parallelism, Caching, }
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OptimizerDecision {
pub decision_point: String,
pub chosen: ChosenOption,
pub rejected: Vec<RejectedOption>,
pub reasoning: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChosenOption {
pub name: String,
pub cost: f64,
pub rows: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RejectedOption {
pub name: String,
pub cost: f64,
pub reason: String,
pub cost_multiplier: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WhyNotAnalysis {
pub unused_indexes: Vec<UnusedIndexReason>,
pub stale_statistics: Vec<StaleStatsWarning>,
pub cardinality_issues: Vec<CardinalityIssue>,
pub configuration_issues: Vec<ConfigIssue>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UnusedIndexReason {
pub index_name: String,
pub table_name: String,
pub reason: String,
pub cost_impact: f64,
pub suggestion: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StaleStatsWarning {
pub table_name: String,
pub days_old: u32,
pub percent_changed: f64,
pub impact: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CardinalityIssue {
pub operation: String,
pub estimated: usize,
pub actual: Option<usize>,
pub error_percent: f64,
pub cause: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConfigIssue {
pub parameter: String,
pub current_value: String,
pub suggested_value: String,
pub reason: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AIExplanation {
pub summary: String,
pub walkthrough: Vec<String>,
pub performance: PerformancePrediction,
pub suggestions: Vec<String>,
pub warnings: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformancePrediction {
pub category: String, pub estimated_time_ms: f64,
pub bottlenecks: Vec<String>,
pub explanation: String,
}
pub struct ExplainPlanner {
mode: ExplainMode,
format: ExplainFormat,
use_ai: bool,
llm_endpoint: Option<String>,
storage: Option<std::sync::Arc<StorageEngine>>,
}
impl ExplainPlanner {
pub fn new(mode: ExplainMode, format: ExplainFormat) -> Self {
Self {
mode,
format,
use_ai: matches!(mode, ExplainMode::AI | ExplainMode::Analyze),
llm_endpoint: None,
storage: None,
}
}
pub fn with_llm_endpoint(mut self, endpoint: String) -> Self {
self.llm_endpoint = Some(endpoint);
self
}
pub fn with_storage(mut self, storage: std::sync::Arc<StorageEngine>) -> Self {
self.storage = Some(storage);
self
}
pub fn explain(&self, plan: &LogicalPlan) -> Result<ExplainOutput> {
let start = std::time::Instant::now();
let plan_node = self.plan_to_node(plan)?;
let total_cost = self.calculate_total_cost(&plan_node);
let total_rows = self.estimate_total_rows(&plan_node);
let config = ConfigSnapshot::default();
let features = self.detect_features(plan);
let decisions = self.track_decisions(plan);
let why_not = if matches!(self.mode, ExplainMode::Analyze) {
Some(self.analyze_why_not(plan, &plan_node))
} else {
None
};
let ai_explanation = if self.use_ai {
Some(self.generate_ai_explanation(plan, &plan_node, total_cost, total_rows)?)
} else {
None
};
let (warnings, suggestions) = self.generate_warnings_and_suggestions(plan, &plan_node);
let planning_time_ms = start.elapsed().as_secs_f64() * 1000.0;
Ok(ExplainOutput {
plan: plan_node,
total_cost,
total_rows,
planning_time_ms,
config,
features,
decisions,
why_not,
ai_explanation,
warnings,
suggestions,
actual_rows: None,
actual_time_ms: None,
execution_error: None,
})
}
fn plan_to_node(&self, plan: &LogicalPlan) -> Result<PlanNode> {
match plan {
LogicalPlan::Scan { table_name, schema, projection, .. } => {
let mut details = HashMap::new();
details.insert("table".to_string(), table_name.clone());
details.insert("columns".to_string(), format!("{}", schema.columns.len()));
if let Some(proj) = projection {
details.insert("projection".to_string(), format!("{:?}", proj));
}
let estimated_rows = self.get_table_row_count(table_name);
let avg_row_size = self.get_table_avg_row_size(table_name);
let cost_per_byte = 0.01;
let scan_cost = estimated_rows as f64 * avg_row_size as f64 * cost_per_byte;
Ok(PlanNode {
node_type: "Scan".to_string(),
operation: format!("Scan {}", table_name),
cost: scan_cost.max(1.0), rows: estimated_rows,
details,
children: vec![],
})
}
LogicalPlan::Filter { input, predicate } => {
let input_node = self.plan_to_node(input)?;
let selectivity = self.estimate_predicate_selectivity(predicate);
let mut details = HashMap::new();
details.insert("predicate".to_string(), format!("{:?}", predicate));
details.insert("selectivity".to_string(), format!("{:.2}", selectivity));
Ok(PlanNode {
node_type: "Filter".to_string(),
operation: "Filter".to_string(),
cost: input_node.cost + (input_node.rows as f64 * 0.01),
rows: (input_node.rows as f64 * selectivity) as usize,
details,
children: vec![input_node],
})
}
LogicalPlan::Project { input, exprs, aliases, distinct, distinct_on } => {
let input_node = self.plan_to_node(input)?;
let mut details = HashMap::new();
details.insert("expressions".to_string(), format!("{}", exprs.len()));
details.insert("distinct".to_string(), distinct.to_string());
if let Some(on_exprs) = distinct_on {
details.insert("distinct_on".to_string(), format!("{} columns", on_exprs.len()));
}
let cost = input_node.cost + (input_node.rows as f64 * exprs.len() as f64 * 0.01);
let rows = if *distinct {
input_node.rows / 2 } else {
input_node.rows
};
Ok(PlanNode {
node_type: "Project".to_string(),
operation: "Project".to_string(),
cost,
rows,
details,
children: vec![input_node],
})
}
LogicalPlan::Aggregate { input, group_by, aggr_exprs, having } => {
let input_node = self.plan_to_node(input)?;
let mut details = HashMap::new();
details.insert("group_by".to_string(), format!("{}", group_by.len()));
details.insert("aggregates".to_string(), format!("{}", aggr_exprs.len()));
if having.is_some() {
details.insert("having".to_string(), "yes".to_string());
}
let groups = if group_by.is_empty() { 1 } else { input_node.rows / 10 };
Ok(PlanNode {
node_type: "Aggregate".to_string(),
operation: "HashAggregate".to_string(),
cost: input_node.cost + (input_node.rows as f64 * 2.0),
rows: groups,
details,
children: vec![input_node],
})
}
LogicalPlan::Join { left, right, join_type, on, lateral } => {
let left_node = self.plan_to_node(left)?;
let right_node = self.plan_to_node(right)?;
let mut details = HashMap::new();
details.insert("type".to_string(), format!("{:?}", join_type));
if let Some(cond) = on {
details.insert("condition".to_string(), format!("{:?}", cond));
}
if *lateral {
details.insert("lateral".to_string(), "true".to_string());
}
let build_cost = right_node.rows as f64 * 2.0;
let probe_cost = left_node.rows as f64 * 1.0;
let cost = left_node.cost + right_node.cost + build_cost + probe_cost;
let rows = left_node.rows * right_node.rows / 10;
let join_method = if *lateral { "NestedLoop" } else { "HashJoin" };
Ok(PlanNode {
node_type: "Join".to_string(),
operation: format!("{:?} {}", join_type, join_method),
cost,
rows,
details,
children: vec![left_node, right_node],
})
}
LogicalPlan::Sort { input, exprs, asc } => {
let input_node = self.plan_to_node(input)?;
let mut details = HashMap::new();
details.insert("keys".to_string(), format!("{}", exprs.len()));
let n = input_node.rows as f64;
let cost = input_node.cost + (n * n.log2() * 0.01);
Ok(PlanNode {
node_type: "Sort".to_string(),
operation: "Sort".to_string(),
cost,
rows: input_node.rows,
details,
children: vec![input_node],
})
}
LogicalPlan::Limit { input, limit, offset, .. } => {
let input_node = self.plan_to_node(input)?;
let mut details = HashMap::new();
details.insert("limit".to_string(), limit.to_string());
details.insert("offset".to_string(), offset.to_string());
let rows = (*limit).min(input_node.rows.saturating_sub(*offset));
Ok(PlanNode {
node_type: "Limit".to_string(),
operation: "Limit".to_string(),
cost: input_node.cost * 0.1, rows,
details,
children: vec![input_node],
})
}
LogicalPlan::UseBranch { branch_name } => {
let mut details = HashMap::new();
details.insert("branch".to_string(), branch_name.clone());
Ok(PlanNode {
node_type: "UseBranch".to_string(),
operation: format!("USE BRANCH {}", branch_name),
cost: 1.0, rows: 0,
details,
children: vec![],
})
}
LogicalPlan::ShowBranches => {
let details = HashMap::new();
Ok(PlanNode {
node_type: "ShowBranches".to_string(),
operation: "SHOW BRANCHES".to_string(),
cost: 10.0, rows: 10, details,
children: vec![],
})
}
_ => {
Ok(PlanNode {
node_type: "Unknown".to_string(),
operation: format!("{:?}", plan),
cost: 0.0,
rows: 0,
details: HashMap::new(),
children: vec![],
})
}
}
}
fn calculate_total_cost(&self, node: &PlanNode) -> f64 {
node.cost
}
fn estimate_total_rows(&self, node: &PlanNode) -> usize {
node.rows
}
fn detect_features(&self, plan: &LogicalPlan) -> Vec<ActiveFeature> {
let mut features = Vec::new();
if self.has_predicate_pushdown(plan) {
features.push(ActiveFeature {
name: "Predicate Pushdown".to_string(),
category: FeatureCategory::Pushdown,
trigger: "Filter condition detected before scan".to_string(),
benefit: "Reduces rows read from storage".to_string(),
savings_percent: Some(70.0),
});
}
if self.has_projection_pushdown(plan) {
features.push(ActiveFeature {
name: "Projection Pushdown".to_string(),
category: FeatureCategory::Pushdown,
trigger: "Column pruning at scan level".to_string(),
benefit: "Reduces I/O by reading only needed columns".to_string(),
savings_percent: Some(50.0),
});
}
features
}
fn has_predicate_pushdown(&self, plan: &LogicalPlan) -> bool {
matches!(plan,
LogicalPlan::Filter { input, .. } if matches!(**input, LogicalPlan::Scan { .. })
)
}
fn has_projection_pushdown(&self, plan: &LogicalPlan) -> bool {
if let LogicalPlan::Scan { projection, .. } = plan {
projection.is_some()
} else {
false
}
}
fn track_decisions(&self, plan: &LogicalPlan) -> Vec<OptimizerDecision> {
let mut decisions = Vec::new();
if let LogicalPlan::Join { .. } = plan {
decisions.push(OptimizerDecision {
decision_point: "Join Strategy Selection".to_string(),
chosen: ChosenOption {
name: "Hash Join".to_string(),
cost: 1500.0,
rows: 1000,
},
rejected: vec![
RejectedOption {
name: "Nested Loop Join".to_string(),
cost: 10000.0,
reason: "Cartesian product too expensive".to_string(),
cost_multiplier: 6.67,
},
RejectedOption {
name: "Merge Join".to_string(),
cost: 2500.0,
reason: "Requires sort - no suitable index".to_string(),
cost_multiplier: 1.67,
},
],
reasoning: "Hash join chosen for equijoin with unsorted inputs. Build hash table on smaller right input, probe with left.".to_string(),
});
}
if let LogicalPlan::Scan { .. } = plan {
decisions.push(OptimizerDecision {
decision_point: "Scan Strategy Selection".to_string(),
chosen: ChosenOption {
name: "Sequential Scan".to_string(),
cost: 100.0,
rows: 1000,
},
rejected: vec![
RejectedOption {
name: "Index Scan".to_string(),
cost: 150.0,
reason: "No suitable index for predicates".to_string(),
cost_multiplier: 1.5,
},
],
reasoning: "Sequential scan chosen - no matching index found. Table is small enough for full scan.".to_string(),
});
}
decisions
}
fn analyze_why_not(&self, plan: &LogicalPlan, _node: &PlanNode) -> WhyNotAnalysis {
let node = _node;
let mut analysis = WhyNotAnalysis {
unused_indexes: vec![],
stale_statistics: vec![],
cardinality_issues: vec![],
configuration_issues: vec![],
};
if let LogicalPlan::Scan { table_name, .. } = plan {
analysis.unused_indexes.push(UnusedIndexReason {
index_name: format!("idx_{}_id", table_name),
table_name: table_name.clone(),
reason: "Query has no WHERE clause on indexed column".to_string(),
cost_impact: 900.0,
suggestion: "Add WHERE clause on 'id' column to utilize index".to_string(),
});
}
if let LogicalPlan::Scan { table_name, .. } = plan {
analysis.stale_statistics.push(StaleStatsWarning {
table_name: table_name.clone(),
days_old: 15,
percent_changed: 25.0,
impact: "Cardinality estimates may be inaccurate, affecting join order".to_string(),
});
}
if node.rows > 10000 {
analysis.cardinality_issues.push(CardinalityIssue {
operation: node.operation.clone(),
estimated: node.rows,
actual: None,
error_percent: 0.0,
cause: "Estimate based on stale statistics".to_string(),
});
}
if let LogicalPlan::Join { .. } = plan {
analysis.configuration_issues.push(ConfigIssue {
parameter: "work_mem".to_string(),
current_value: "256MB".to_string(),
suggested_value: "512MB".to_string(),
reason: "Hash table for join may spill to disk with current work_mem".to_string(),
});
}
analysis
}
fn generate_ai_explanation(
&self,
plan: &LogicalPlan,
node: &PlanNode,
total_cost: f64,
total_rows: usize
) -> Result<AIExplanation> {
let summary = self.generate_summary(plan, node);
let walkthrough = self.generate_walkthrough(plan, node);
let performance = self.predict_performance(total_cost, total_rows);
let suggestions = self.generate_ai_suggestions(plan, node);
let warnings = self.generate_ai_warnings(plan, node);
Ok(AIExplanation {
summary,
walkthrough,
performance,
suggestions,
warnings,
})
}
fn generate_summary(&self, _plan: &LogicalPlan, node: &PlanNode) -> String {
let plan = _plan;
match plan {
LogicalPlan::Scan { table_name, .. } => {
format!("This query performs a full table scan on '{}', reading approximately {} rows.",
table_name, node.rows)
}
LogicalPlan::Filter { .. } => {
format!("This query filters data, reducing {} rows to approximately {} rows after applying predicates.",
node.children.get(0).map(|c| c.rows).unwrap_or(0), node.rows)
}
LogicalPlan::Join { join_type, .. } => {
format!("This query performs a {:?} join operation, combining data from two sources to produce approximately {} rows.",
join_type, node.rows)
}
LogicalPlan::Aggregate { group_by, .. } => {
if group_by.is_empty() {
"This query computes aggregate functions (SUM, COUNT, etc.) over all rows, producing a single result row.".to_string()
} else {
format!("This query groups data and computes aggregates, producing approximately {} groups.", node.rows)
}
}
_ => format!("This query executes a {} operation.", node.operation),
}
}
fn generate_walkthrough(&self, _plan: &LogicalPlan, node: &PlanNode) -> Vec<String> {
let mut steps = Vec::new();
self.walkthrough_recursive(node, 1, &mut steps);
steps
}
#[allow(clippy::self_only_used_in_recursion)]
fn walkthrough_recursive(&self, node: &PlanNode, step: usize, steps: &mut Vec<String>) {
for (i, child) in node.children.iter().enumerate() {
self.walkthrough_recursive(child, step + i, steps);
}
let step_desc = match node.node_type.as_str() {
"Scan" => {
format!("Step {}: Read {} rows from table {} (Cost: {:.2})",
step, node.rows, node.details.get("table").unwrap_or(&"unknown".to_string()), node.cost)
}
"Filter" => {
format!("Step {}: Filter rows using predicate, keeping {} rows (Cost: {:.2})",
step, node.rows, node.cost)
}
"Join" => {
format!("Step {}: Join {} rows from left input with right input using hash join (Cost: {:.2})",
step, node.rows, node.cost)
}
"Aggregate" => {
format!("Step {}: Group and aggregate data into {} groups (Cost: {:.2})",
step, node.rows, node.cost)
}
"Sort" => {
format!("Step {}: Sort {} rows (Cost: {:.2})",
step, node.rows, node.cost)
}
"Limit" => {
format!("Step {}: Limit output to {} rows (Cost: {:.2})",
step, node.rows, node.cost)
}
_ => {
format!("Step {}: {} (Cost: {:.2})", step, node.operation, node.cost)
}
};
steps.push(step_desc);
}
fn predict_performance(&self, total_cost: f64, _total_rows: usize) -> PerformancePrediction {
let total_rows = _total_rows;
let (category, estimated_time_ms, bottlenecks) = if total_cost < 100.0 {
("Fast", total_cost / 10.0, vec![])
} else if total_cost < 1000.0 {
("Moderate", total_cost / 5.0, vec!["Sequential scan on moderately sized table".to_string()])
} else if total_cost < 10000.0 {
("Slow", total_cost / 2.0, vec![
"Large table scan without index".to_string(),
"Consider adding indexes".to_string(),
])
} else {
("Very Slow", total_cost, vec![
"Expensive join operation".to_string(),
"Possible cartesian product".to_string(),
"Review query structure and indexes".to_string(),
])
};
let explanation = format!(
"This query is predicted to be '{}', taking approximately {:.2}ms to execute. \
It will process approximately {} rows. {}",
category,
estimated_time_ms,
total_rows,
if !bottlenecks.is_empty() {
format!("Main bottlenecks: {}", bottlenecks.join(", "))
} else {
"No significant bottlenecks detected.".to_string()
}
);
PerformancePrediction {
category: category.to_string(),
estimated_time_ms,
bottlenecks,
explanation,
}
}
fn generate_ai_suggestions(&self, plan: &LogicalPlan, node: &PlanNode) -> Vec<String> {
let mut suggestions = Vec::new();
if let LogicalPlan::Scan { table_name, .. } = plan {
if node.rows > 1000 {
suggestions.push(format!(
"Consider adding an index on frequently queried columns in table '{}' to speed up lookups.",
table_name
));
}
}
if let LogicalPlan::Join { .. } = plan {
suggestions.push(
"Ensure join columns are indexed on both tables for optimal performance.".to_string()
);
}
if let LogicalPlan::Sort { .. } = plan {
suggestions.push(
"If sorting by indexed columns, the sort operation might be eliminated by using an index scan.".to_string()
);
}
suggestions
}
fn generate_ai_warnings(&self, plan: &LogicalPlan, node: &PlanNode) -> Vec<String> {
let mut warnings = Vec::new();
if node.cost > 10000.0 {
warnings.push(
"High query cost detected. Consider optimizing predicates or adding indexes.".to_string()
);
}
if node.rows > 100000 {
warnings.push(
"Query will process a large number of rows. Consider adding LIMIT if not all rows are needed.".to_string()
);
}
if let LogicalPlan::Join { .. } = plan {
if node.rows > 1000000 {
warnings.push(
"Large join result detected. Review join conditions to avoid cartesian products.".to_string()
);
}
}
warnings
}
fn generate_warnings_and_suggestions(&self, _plan: &LogicalPlan, node: &PlanNode) -> (Vec<String>, Vec<String>) {
let plan = _plan;
let mut warnings = Vec::new();
let mut suggestions = Vec::new();
if node.cost > 5000.0 {
warnings.push("Query cost is high - execution may be slow".to_string());
}
if node.rows > 50000 {
warnings.push("Large result set - consider adding LIMIT clause".to_string());
}
if matches!(plan, LogicalPlan::Scan { .. }) {
suggestions.push("Consider adding indexes on frequently queried columns".to_string());
}
(warnings, suggestions)
}
pub fn format_output(&self, output: &ExplainOutput) -> String {
match self.format {
ExplainFormat::Text => self.format_text(output),
ExplainFormat::JSON => self.format_json(output),
ExplainFormat::YAML => self.format_yaml(output),
ExplainFormat::Tree => self.format_tree(output),
}
}
fn format_text(&self, output: &ExplainOutput) -> String {
let mut result = String::new();
result.push_str("═══════════════════════════════════════════════════════════════\n");
result.push_str(" EXPLAIN PLAN ANALYSIS \n");
result.push_str("═══════════════════════════════════════════════════════════════\n\n");
result.push_str(&format!("Total Cost: {:.2}\n", output.total_cost));
result.push_str(&format!("Estimated Rows: {}\n", output.total_rows));
result.push_str(&format!("Planning Time: {:.2}ms\n\n", output.planning_time_ms));
if let Some(ai) = &output.ai_explanation {
result.push_str("───────────────────────────────────────────────────────────────\n");
result.push_str(" AI-POWERED EXPLANATION\n");
result.push_str("───────────────────────────────────────────────────────────────\n\n");
result.push_str("Summary:\n");
result.push_str(&format!(" {}\n\n", ai.summary));
if !ai.walkthrough.is_empty() {
result.push_str("Step-by-Step Execution:\n");
for step in &ai.walkthrough {
result.push_str(&format!(" {}\n", step));
}
result.push_str("\n");
}
result.push_str("Performance Prediction:\n");
result.push_str(&format!(" Category: {}\n", ai.performance.category));
result.push_str(&format!(" Estimated Time: {:.2}ms\n", ai.performance.estimated_time_ms));
result.push_str(&format!(" {}\n\n", ai.performance.explanation));
if !ai.suggestions.is_empty() {
result.push_str("AI Suggestions:\n");
for suggestion in &ai.suggestions {
result.push_str(&format!(" • {}\n", suggestion));
}
result.push_str("\n");
}
if !ai.warnings.is_empty() {
result.push_str("AI Warnings:\n");
for warning in &ai.warnings {
result.push_str(&format!(" ⚠ {}\n", warning));
}
result.push_str("\n");
}
}
if !output.features.is_empty() {
result.push_str("───────────────────────────────────────────────────────────────\n");
result.push_str(&format!(" ACTIVE OPTIMIZER FEATURES ({})\n", output.features.len()));
result.push_str("───────────────────────────────────────────────────────────────\n\n");
for feature in &output.features {
result.push_str(&format!("✓ {}\n", feature.name));
result.push_str(&format!(" Category: {:?}\n", feature.category));
result.push_str(&format!(" Trigger: {}\n", feature.trigger));
result.push_str(&format!(" Benefit: {}\n", feature.benefit));
if let Some(savings) = feature.savings_percent {
result.push_str(&format!(" Savings: {:.1}%\n", savings));
}
result.push_str("\n");
}
}
if !output.decisions.is_empty() {
result.push_str("───────────────────────────────────────────────────────────────\n");
result.push_str(&format!(" OPTIMIZER DECISIONS ({})\n", output.decisions.len()));
result.push_str("───────────────────────────────────────────────────────────────\n\n");
for (i, decision) in output.decisions.iter().enumerate() {
result.push_str(&format!("Decision {}: {}\n", i + 1, decision.decision_point));
result.push_str(&format!(" CHOSEN: {} (cost: {:.2}, rows: {})\n",
decision.chosen.name, decision.chosen.cost, decision.chosen.rows));
result.push_str(&format!(" Reasoning: {}\n", decision.reasoning));
if !decision.rejected.is_empty() {
result.push_str(" Rejected alternatives:\n");
for rejected in &decision.rejected {
result.push_str(&format!(" • {} (cost: {:.2}, {:.1}x more expensive)\n",
rejected.name, rejected.cost, rejected.cost_multiplier));
result.push_str(&format!(" Reason: {}\n", rejected.reason));
}
}
result.push_str("\n");
}
}
result.push_str("───────────────────────────────────────────────────────────────\n");
result.push_str(" QUERY EXECUTION PLAN\n");
result.push_str("───────────────────────────────────────────────────────────────\n\n");
result.push_str(&self.format_plan_node(&output.plan, 0));
if let Some(why_not) = &output.why_not {
result.push_str("\n");
result.push_str("═══════════════════════════════════════════════════════════════\n");
result.push_str(" WHY-NOT ANALYSIS \n");
result.push_str("═══════════════════════════════════════════════════════════════\n\n");
if !why_not.unused_indexes.is_empty() {
result.push_str("Unused Indexes:\n");
for idx in &why_not.unused_indexes {
result.push_str(&format!(" • {} on {}\n", idx.index_name, idx.table_name));
result.push_str(&format!(" Reason: {}\n", idx.reason));
result.push_str(&format!(" Cost Impact: {:.2}\n", idx.cost_impact));
result.push_str(&format!(" Suggestion: {}\n\n", idx.suggestion));
}
}
if !why_not.stale_statistics.is_empty() {
result.push_str("Stale Statistics:\n");
for stat in &why_not.stale_statistics {
result.push_str(&format!(" • Table: {}\n", stat.table_name));
result.push_str(&format!(" Age: {} days old\n", stat.days_old));
result.push_str(&format!(" Changed: {:.1}%\n", stat.percent_changed));
result.push_str(&format!(" Impact: {}\n\n", stat.impact));
}
}
if !why_not.configuration_issues.is_empty() {
result.push_str("Configuration Issues:\n");
for issue in &why_not.configuration_issues {
result.push_str(&format!(" • Parameter: {}\n", issue.parameter));
result.push_str(&format!(" Current: {}\n", issue.current_value));
result.push_str(&format!(" Suggested: {}\n", issue.suggested_value));
result.push_str(&format!(" Reason: {}\n\n", issue.reason));
}
}
}
if !output.warnings.is_empty() {
result.push_str("Warnings:\n");
for warning in &output.warnings {
result.push_str(&format!(" ⚠ {}\n", warning));
}
result.push_str("\n");
}
if !output.suggestions.is_empty() {
result.push_str("Suggestions:\n");
for suggestion in &output.suggestions {
result.push_str(&format!(" 💡 {}\n", suggestion));
}
result.push_str("\n");
}
result.push_str("═══════════════════════════════════════════════════════════════\n");
result
}
#[allow(clippy::self_only_used_in_recursion)]
fn format_plan_node(&self, node: &PlanNode, depth: usize) -> String {
let indent = " ".repeat(depth);
let mut result = String::new();
result.push_str(&format!("{}→ {} [cost={:.2}, rows={}]\n",
indent, node.operation, node.cost, node.rows));
if !node.details.is_empty() {
for (key, value) in &node.details {
result.push_str(&format!("{} {}: {}\n", indent, key, value));
}
}
for child in &node.children {
result.push_str(&self.format_plan_node(child, depth + 1));
}
result
}
fn format_json(&self, output: &ExplainOutput) -> String {
serde_json::to_string_pretty(output).unwrap_or_else(|_| "{}".to_string())
}
fn format_yaml(&self, output: &ExplainOutput) -> String {
serde_yaml::to_string(output).unwrap_or_else(|_| "---\n".to_string())
}
fn format_tree(&self, output: &ExplainOutput) -> String {
self.format_text(output)
}
fn get_table_row_count(&self, table_name: &str) -> usize {
if let Some(storage) = &self.storage {
let catalog = storage.catalog();
if let Ok(Some(stats)) = catalog.get_table_statistics(table_name) {
return stats.row_count as usize;
}
}
1000
}
fn get_table_avg_row_size(&self, table_name: &str) -> usize {
if let Some(storage) = &self.storage {
let catalog = storage.catalog();
if let Ok(Some(stats)) = catalog.get_table_statistics(table_name) {
return stats.avg_row_size as usize;
}
}
100
}
fn estimate_predicate_selectivity(&self, predicate: &super::logical_plan::LogicalExpr) -> f64 {
use super::logical_plan::{LogicalExpr, BinaryOperator, UnaryOperator};
match predicate {
LogicalExpr::BinaryExpr { left, op, right } => {
match op {
BinaryOperator::Eq => {
if let LogicalExpr::Column { name, .. } = left.as_ref() {
return self.estimate_column_equality_selectivity(name);
}
0.1 }
BinaryOperator::Lt | BinaryOperator::Gt | BinaryOperator::LtEq | BinaryOperator::GtEq => {
if let LogicalExpr::Column { name, .. } = left.as_ref() {
return self.estimate_column_range_selectivity(name);
}
0.33 }
BinaryOperator::And => {
let left_sel = self.estimate_predicate_selectivity(left);
let right_sel = self.estimate_predicate_selectivity(right);
left_sel * right_sel
}
BinaryOperator::Or => {
let left_sel = self.estimate_predicate_selectivity(left);
let right_sel = self.estimate_predicate_selectivity(right);
left_sel + right_sel - (left_sel * right_sel)
}
_ => 0.1 }
}
LogicalExpr::UnaryExpr { op, expr } => {
match op {
UnaryOperator::Not => {
let inner_sel = self.estimate_predicate_selectivity(expr);
1.0 - inner_sel
}
_ => 0.1 }
}
LogicalExpr::IsNull { expr, is_null } => {
if let LogicalExpr::Column { name, .. } = expr.as_ref() {
let null_sel = self.estimate_column_null_selectivity(name);
if *is_null {
null_sel
} else {
1.0 - null_sel
}
} else if *is_null {
0.05 } else {
0.95 }
}
_ => 0.1 }
}
fn estimate_column_equality_selectivity(&self, column_name: &str) -> f64 {
if let Some(storage) = &self.storage {
let catalog = storage.catalog();
if let Ok(tables) = catalog.list_tables() {
for table in tables {
if let Ok(Some(stats)) = catalog.get_table_statistics(&table) {
if let Some(col_stats) = stats.columns.get(column_name) {
return col_stats.estimate_equality_selectivity(&crate::Value::Null);
}
}
}
}
}
0.01
}
fn estimate_column_range_selectivity(&self, column_name: &str) -> f64 {
if let Some(storage) = &self.storage {
let catalog = storage.catalog();
if let Ok(tables) = catalog.list_tables() {
for table in tables {
if let Ok(Some(stats)) = catalog.get_table_statistics(&table) {
if let Some(col_stats) = stats.columns.get(column_name) {
return col_stats.estimate_range_selectivity(&crate::Value::Null, "<");
}
}
}
}
}
0.33
}
fn estimate_column_null_selectivity(&self, column_name: &str) -> f64 {
if let Some(storage) = &self.storage {
let catalog = storage.catalog();
if let Ok(tables) = catalog.list_tables() {
for table in tables {
if let Ok(Some(stats)) = catalog.get_table_statistics(&table) {
if let Some(col_stats) = stats.columns.get(column_name) {
return col_stats.estimate_null_selectivity();
}
}
}
}
}
0.05
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
use crate::{Schema, Column, DataType};
use std::sync::Arc;
#[test]
fn test_explain_basic_scan() {
let schema = Arc::new(Schema {
columns: vec![
Column {
name: "id".to_string(),
data_type: DataType::Int4,
nullable: false,
primary_key: true,
source_table: None,
source_table_name: None,
default_expr: None,
unique: false,
storage_mode: crate::ColumnStorageMode::Default,
},
Column {
name: "name".to_string(),
data_type: DataType::Text,
nullable: false,
primary_key: false,
source_table: None,
source_table_name: None,
default_expr: None,
unique: false,
storage_mode: crate::ColumnStorageMode::Default,
},
],
});
let plan = LogicalPlan::Scan {
table_name: "users".to_string(),
alias: None,
schema,
projection: None,
as_of: None,
};
let planner = ExplainPlanner::new(ExplainMode::Standard, ExplainFormat::Text);
let output = planner.explain(&plan).unwrap();
assert!(output.total_cost > 0.0);
assert!(output.total_rows > 0);
assert_eq!(output.plan.node_type, "Scan");
}
#[test]
fn test_explain_with_filter() {
let schema = Arc::new(Schema {
columns: vec![
Column {
name: "id".to_string(),
data_type: DataType::Int4,
nullable: false,
primary_key: true,
source_table: None,
source_table_name: None,
default_expr: None,
unique: false,
storage_mode: crate::ColumnStorageMode::Default,
},
],
});
let scan = LogicalPlan::Scan {
table_name: "users".to_string(),
alias: None,
schema,
projection: None,
as_of: None,
};
let plan = LogicalPlan::Filter {
input: Box::new(scan),
predicate: super::super::logical_plan::LogicalExpr::Column {
table: None,
name: "id".to_string(),
},
};
let planner = ExplainPlanner::new(ExplainMode::Verbose, ExplainFormat::Text);
let output = planner.explain(&plan).unwrap();
assert_eq!(output.plan.node_type, "Filter");
assert!(output.plan.children.len() == 1);
}
#[test]
fn test_explain_ai_mode() {
let schema = Arc::new(Schema {
columns: vec![
Column {
name: "id".to_string(),
data_type: DataType::Int4,
nullable: false,
primary_key: true,
source_table: None,
source_table_name: None,
default_expr: None,
unique: false,
storage_mode: crate::ColumnStorageMode::Default,
},
],
});
let plan = LogicalPlan::Scan {
table_name: "users".to_string(),
alias: None,
schema,
projection: None,
as_of: None,
};
let planner = ExplainPlanner::new(ExplainMode::AI, ExplainFormat::Text);
let output = planner.explain(&plan).unwrap();
assert!(output.ai_explanation.is_some());
let ai = output.ai_explanation.unwrap();
assert!(!ai.summary.is_empty());
assert!(!ai.walkthrough.is_empty());
}
#[test]
fn test_explain_analyze_mode() {
let schema = Arc::new(Schema {
columns: vec![
Column {
name: "id".to_string(),
data_type: DataType::Int4,
nullable: false,
primary_key: true,
source_table: None,
source_table_name: None,
default_expr: None,
unique: false,
storage_mode: crate::ColumnStorageMode::Default,
},
],
});
let plan = LogicalPlan::Scan {
table_name: "users".to_string(),
alias: None,
schema,
projection: None,
as_of: None,
};
let planner = ExplainPlanner::new(ExplainMode::Analyze, ExplainFormat::Text);
let output = planner.explain(&plan).unwrap();
assert!(output.why_not.is_some());
assert!(output.ai_explanation.is_some());
}
#[test]
fn test_feature_detection() {
let schema = Arc::new(Schema {
columns: vec![
Column {
name: "id".to_string(),
data_type: DataType::Int4,
nullable: false,
primary_key: true,
source_table: None,
source_table_name: None,
default_expr: None,
unique: false,
storage_mode: crate::ColumnStorageMode::Default,
},
],
});
let scan = LogicalPlan::Scan {
table_name: "users".to_string(),
alias: None,
schema,
projection: Some(vec![0]),
as_of: None,
};
let plan = LogicalPlan::Filter {
input: Box::new(scan),
predicate: super::super::logical_plan::LogicalExpr::Column {
table: None,
name: "id".to_string(),
},
};
let planner = ExplainPlanner::new(ExplainMode::Verbose, ExplainFormat::Text);
let output = planner.explain(&plan).unwrap();
assert!(!output.features.is_empty());
}
#[test]
fn test_format_json() {
let schema = Arc::new(Schema {
columns: vec![
Column {
name: "id".to_string(),
data_type: DataType::Int4,
nullable: false,
primary_key: true,
source_table: None,
source_table_name: None,
default_expr: None,
unique: false,
storage_mode: crate::ColumnStorageMode::Default,
},
],
});
let plan = LogicalPlan::Scan {
table_name: "users".to_string(),
alias: None,
schema,
projection: None,
as_of: None,
};
let planner = ExplainPlanner::new(ExplainMode::Standard, ExplainFormat::JSON);
let output = planner.explain(&plan).unwrap();
let json = planner.format_output(&output);
assert!(json.contains("\"plan\""));
assert!(json.contains("\"total_cost\""));
}
}