kaccy_db/
query_plan.rs

1//! Query execution plan analysis utilities
2//!
3//! This module provides utilities for analyzing PostgreSQL query execution plans
4//! to identify performance issues and optimization opportunities.
5
6use crate::error::Result;
7use serde::{Deserialize, Serialize};
8use sqlx::PgPool;
9
10/// Query execution plan node
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct QueryPlanNode {
13    /// Node type (e.g., "Seq Scan", "Index Scan", "Hash Join")
14    pub node_type: String,
15    /// Relation/table name if applicable
16    pub relation: Option<String>,
17    /// Startup cost estimate
18    pub startup_cost: f64,
19    /// Total cost estimate
20    pub total_cost: f64,
21    /// Estimated number of rows
22    pub rows: i64,
23    /// Estimated width of rows in bytes
24    pub width: i32,
25    /// Actual time to get first row (if EXPLAIN ANALYZE)
26    pub actual_startup_time: Option<f64>,
27    /// Actual total time (if EXPLAIN ANALYZE)
28    pub actual_total_time: Option<f64>,
29    /// Actual number of rows (if EXPLAIN ANALYZE)
30    pub actual_rows: Option<i64>,
31}
32
33/// Query execution plan analysis result
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct QueryPlan {
36    /// The SQL query that was analyzed
37    pub query: String,
38    /// Total estimated cost
39    pub total_cost: f64,
40    /// Estimated number of rows
41    pub estimated_rows: i64,
42    /// Planning time in milliseconds (if EXPLAIN ANALYZE)
43    pub planning_time: Option<f64>,
44    /// Execution time in milliseconds (if EXPLAIN ANALYZE)
45    pub execution_time: Option<f64>,
46    /// Full plan text
47    pub plan_text: String,
48    /// Performance warnings
49    pub warnings: Vec<String>,
50}
51
52/// Analyze a query's execution plan without actually executing it
53///
54/// Uses EXPLAIN to show the query plan. Useful for analyzing expensive queries
55/// before running them.
56pub async fn explain_query(pool: &PgPool, query: &str) -> Result<QueryPlan> {
57    let explain_query = format!("EXPLAIN (FORMAT JSON, VERBOSE) {}", query);
58
59    let plan_json: serde_json::Value = sqlx::query_scalar(&explain_query).fetch_one(pool).await?;
60
61    parse_plan(query, &plan_json, false)
62}
63
64/// Analyze a query's execution plan by actually running it
65///
66/// Uses EXPLAIN ANALYZE to show the actual query execution statistics.
67/// WARNING: This will execute the query, so use with caution on
68/// INSERT/UPDATE/DELETE queries. Consider wrapping in a transaction
69/// that you roll back.
70pub async fn explain_analyze_query(pool: &PgPool, query: &str) -> Result<QueryPlan> {
71    let explain_query = format!("EXPLAIN (ANALYZE, FORMAT JSON, VERBOSE, BUFFERS) {}", query);
72
73    let plan_json: serde_json::Value = sqlx::query_scalar(&explain_query).fetch_one(pool).await?;
74
75    parse_plan(query, &plan_json, true)
76}
77
78/// Parse the JSON plan output from PostgreSQL
79fn parse_plan(query: &str, plan_json: &serde_json::Value, is_analyze: bool) -> Result<QueryPlan> {
80    let plan_array = plan_json.as_array().ok_or_else(|| {
81        crate::error::DbError::Other("Invalid plan JSON: expected array".to_string())
82    })?;
83
84    let plan = plan_array.first().ok_or_else(|| {
85        crate::error::DbError::Other("Invalid plan JSON: empty array".to_string())
86    })?;
87
88    let plan_obj = plan
89        .get("Plan")
90        .ok_or_else(|| crate::error::DbError::Other("No Plan object found".to_string()))?;
91
92    let total_cost = plan_obj
93        .get("Total Cost")
94        .and_then(|v| v.as_f64())
95        .unwrap_or(0.0);
96
97    let estimated_rows = plan_obj
98        .get("Plan Rows")
99        .and_then(|v| v.as_i64())
100        .unwrap_or(0);
101
102    let planning_time = if is_analyze {
103        plan.get("Planning Time").and_then(|v| v.as_f64())
104    } else {
105        None
106    };
107
108    let execution_time = if is_analyze {
109        plan.get("Execution Time").and_then(|v| v.as_f64())
110    } else {
111        None
112    };
113
114    // Convert plan to formatted text
115    let plan_text = serde_json::to_string_pretty(plan)
116        .map_err(|e| crate::error::DbError::Other(format!("Failed to serialize plan: {}", e)))?;
117
118    // Generate warnings based on plan characteristics
119    let mut warnings = Vec::new();
120
121    if let Some(node_type) = plan_obj.get("Node Type").and_then(|v| v.as_str()) {
122        if node_type.contains("Seq Scan") {
123            warnings.push("Sequential scan detected - consider adding an index".to_string());
124        }
125    }
126
127    if total_cost > 1000.0 {
128        warnings.push(format!(
129            "High estimated cost ({:.2}) - query may be expensive",
130            total_cost
131        ));
132    }
133
134    if let Some(exec_time) = execution_time {
135        if exec_time > 1000.0 {
136            warnings.push(format!("Slow query: execution took {:.2}ms", exec_time));
137        }
138    }
139
140    Ok(QueryPlan {
141        query: query.to_string(),
142        total_cost,
143        estimated_rows,
144        planning_time,
145        execution_time,
146        plan_text,
147        warnings,
148    })
149}
150
151/// Get the top N most expensive queries from pg_stat_statements
152///
153/// Requires the pg_stat_statements extension to be installed.
154/// Returns queries sorted by total execution time.
155pub async fn get_expensive_queries(pool: &PgPool, limit: i64) -> Result<Vec<ExpensiveQuery>> {
156    let queries = sqlx::query_as::<_, (String, i64, f64, f64, f64)>(
157        r#"
158        SELECT
159            query,
160            calls,
161            total_exec_time,
162            mean_exec_time,
163            stddev_exec_time
164        FROM pg_stat_statements
165        WHERE query NOT LIKE '%pg_stat_statements%'
166        ORDER BY total_exec_time DESC
167        LIMIT $1
168        "#,
169    )
170    .bind(limit)
171    .fetch_all(pool)
172    .await;
173
174    match queries {
175        Ok(rows) => Ok(rows
176            .into_iter()
177            .map(|r| ExpensiveQuery {
178                query: r.0,
179                calls: r.1,
180                total_time: r.2,
181                mean_time: r.3,
182                stddev_time: r.4,
183            })
184            .collect()),
185        Err(_) => {
186            // pg_stat_statements might not be installed
187            Ok(Vec::new())
188        }
189    }
190}
191
192/// Information about an expensive query from pg_stat_statements
193#[derive(Debug, Clone, Serialize, Deserialize)]
194pub struct ExpensiveQuery {
195    /// The query text
196    pub query: String,
197    /// Number of times executed
198    pub calls: i64,
199    /// Total execution time in milliseconds
200    pub total_time: f64,
201    /// Mean execution time in milliseconds
202    pub mean_time: f64,
203    /// Standard deviation of execution time
204    pub stddev_time: f64,
205}
206
207#[cfg(test)]
208mod tests {
209    use super::*;
210
211    #[test]
212    fn test_query_plan_structure() {
213        let plan = QueryPlan {
214            query: "SELECT * FROM users".to_string(),
215            total_cost: 100.0,
216            estimated_rows: 1000,
217            planning_time: Some(0.5),
218            execution_time: Some(10.0),
219            plan_text: "{}".to_string(),
220            warnings: vec!["Sequential scan".to_string()],
221        };
222
223        assert_eq!(plan.total_cost, 100.0);
224        assert_eq!(plan.warnings.len(), 1);
225    }
226
227    #[test]
228    fn test_expensive_query_structure() {
229        let query = ExpensiveQuery {
230            query: "SELECT * FROM large_table".to_string(),
231            calls: 1000,
232            total_time: 5000.0,
233            mean_time: 5.0,
234            stddev_time: 1.0,
235        };
236
237        assert_eq!(query.calls, 1000);
238        assert_eq!(query.mean_time, 5.0);
239    }
240
241    #[test]
242    fn test_query_plan_serialization() {
243        let plan = QueryPlan {
244            query: "SELECT id FROM users".to_string(),
245            total_cost: 50.0,
246            estimated_rows: 100,
247            planning_time: None,
248            execution_time: None,
249            plan_text: "test".to_string(),
250            warnings: vec![],
251        };
252
253        let json = serde_json::to_string(&plan).unwrap();
254        let deserialized: QueryPlan = serde_json::from_str(&json).unwrap();
255
256        assert_eq!(deserialized.query, plan.query);
257        assert_eq!(deserialized.total_cost, plan.total_cost);
258    }
259
260    #[test]
261    fn test_query_plan_node_structure() {
262        let node = QueryPlanNode {
263            node_type: "Index Scan".to_string(),
264            relation: Some("users".to_string()),
265            startup_cost: 0.0,
266            total_cost: 10.0,
267            rows: 100,
268            width: 32,
269            actual_startup_time: Some(0.1),
270            actual_total_time: Some(1.0),
271            actual_rows: Some(95),
272        };
273
274        assert_eq!(node.node_type, "Index Scan");
275        assert_eq!(node.relation, Some("users".to_string()));
276        assert_eq!(node.rows, 100);
277    }
278
279    #[test]
280    fn test_expensive_query_serialization() {
281        let query = ExpensiveQuery {
282            query: "SELECT * FROM orders".to_string(),
283            calls: 500,
284            total_time: 2500.0,
285            mean_time: 5.0,
286            stddev_time: 0.5,
287        };
288
289        let json = serde_json::to_string(&query).unwrap();
290        let deserialized: ExpensiveQuery = serde_json::from_str(&json).unwrap();
291
292        assert_eq!(deserialized.calls, query.calls);
293        assert_eq!(deserialized.total_time, query.total_time);
294    }
295}