schema_risk/
recommendation.rs

1//! Rule-based fix suggestion engine.
2//!
3//! Given a list of parsed SQL statements and optional row-count metadata,
4//! this module produces structured `FixSuggestion` objects — each with a
5//! corrected SQL snippet or a step-by-step zero-downtime migration plan.
6//!
7//! ## Rules implemented
8//!
9//! | ID   | Trigger                              | Severity |
10//! |------|--------------------------------------|----------|
11//! | R01  | CREATE INDEX without CONCURRENTLY    | Blocking |
12//! | R02  | ADD COLUMN NOT NULL without DEFAULT  | Blocking |
13//! | R03  | DROP COLUMN on large table           | Warning  |
14//! | R04  | Missing index on foreign key column  | Warning  |
15//! | R05  | RENAME COLUMN                        | Blocking |
16//! | R06  | RENAME TABLE                         | Blocking |
17//! | R07  | ALTER COLUMN TYPE on large table     | Blocking |
18//! | R08  | Long ACCESS EXCLUSIVE lock duration  | Warning  |
19
20use crate::parser::ParsedStatement;
21use serde::{Deserialize, Serialize};
22use std::collections::HashMap;
23
24// ─────────────────────────────────────────────
25// Core types
26// ─────────────────────────────────────────────
27
28/// Severity of a fix suggestion — used for ordering and exit-code decisions.
29#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
30#[serde(rename_all = "snake_case")]
31pub enum FixSeverity {
32    /// A nice-to-have improvement.
33    Info,
34    /// May cause degraded performance or brief downtime on large tables.
35    Warning,
36    /// Will likely cause production downtime, data loss, or a failed migration.
37    Blocking,
38}
39
40impl std::fmt::Display for FixSeverity {
41    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42        match self {
43            FixSeverity::Info => write!(f, "INFO"),
44            FixSeverity::Warning => write!(f, "WARNING"),
45            FixSeverity::Blocking => write!(f, "BLOCKING"),
46        }
47    }
48}
49
50/// A single recommended fix for a risky migration operation.
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct FixSuggestion {
53    /// Short rule identifier, e.g. "R01".
54    pub rule_id: String,
55    /// Short, actionable title shown in the terminal and CI comments.
56    pub title: String,
57    /// Full explanation: what will go wrong, and why the fix helps.
58    pub explanation: String,
59    /// Drop-in replacement SQL (if the fix is a single-statement rewrite).
60    pub fixed_sql: Option<String>,
61    /// Ordered migration steps for complex zero-downtime patterns.
62    pub migration_steps: Option<Vec<String>>,
63    /// How serious this finding is.
64    pub severity: FixSeverity,
65    /// Link to relevant PostgreSQL documentation or best-practices guide.
66    pub docs_url: Option<String>,
67    /// `true` when `apply_fixes()` can mechanically patch the raw SQL text.
68    pub auto_fixable: bool,
69}
70
71// ─────────────────────────────────────────────
72// Public API
73// ─────────────────────────────────────────────
74
75/// Analyse a slice of parsed statements and return a de-duplicated,
76/// severity-sorted list of `FixSuggestion`s.
77///
78/// `row_counts` maps table name → estimated row count. May be empty in
79/// offline mode; rules will fall back to heuristic thresholds.
80pub fn suggest_fixes(
81    statements: &[ParsedStatement],
82    row_counts: &HashMap<String, u64>,
83) -> Vec<FixSuggestion> {
84    let mut suggestions: Vec<FixSuggestion> = Vec::new();
85
86    for stmt in statements {
87        match stmt {
88            // ── R01: CREATE INDEX without CONCURRENTLY ────────────────────
89            ParsedStatement::CreateIndex {
90                table,
91                columns,
92                concurrently,
93                index_name,
94                unique,
95            } => {
96                if !concurrently {
97                    let rows = row_counts.get(table).copied().unwrap_or(0);
98                    let col_list = columns.join(", ");
99                    let unique_kw = if *unique { "UNIQUE " } else { "" };
100                    let name = index_name.as_deref().unwrap_or("idx_name");
101                    suggestions.push(rule_r01_index_concurrently(
102                        table, &col_list, unique_kw, name, rows,
103                    ));
104                }
105            }
106
107            // ── R02 + R09: ADD COLUMN — NOT NULL without DEFAULT / DEFAULT on old PG ──
108            ParsedStatement::AlterTableAddColumn { table, column } => {
109                let rows = row_counts.get(table).copied().unwrap_or(0);
110                // R02: NOT NULL without any DEFAULT will fail on non-empty tables
111                if !column.nullable && !column.has_default {
112                    suggestions.push(rule_r02_add_not_null(
113                        table,
114                        &column.name,
115                        &column.data_type,
116                        rows,
117                    ));
118                }
119                // R09: Adding a column WITH DEFAULT — safe on PG11+, table-rewrite on PG10-
120                if column.has_default {
121                    suggestions.push(rule_r09_add_column_default(
122                        table,
123                        &column.name,
124                        &column.data_type,
125                        rows,
126                    ));
127                }
128            }
129
130            // ── R03: DROP COLUMN ──────────────────────────────────────────
131            ParsedStatement::AlterTableDropColumn { table, column, .. } => {
132                let rows = row_counts.get(table).copied().unwrap_or(0);
133                suggestions.push(rule_r03_drop_column(table, column, rows));
134            }
135
136            // ── R04: ADD FOREIGN KEY without index on FK column ───────────
137            ParsedStatement::AlterTableAddForeignKey { table, fk } => {
138                suggestions.push(rule_r04_missing_fk_index(table, &fk.columns));
139            }
140
141            // ── R05: RENAME COLUMN ────────────────────────────────────────
142            ParsedStatement::AlterTableRenameColumn { table, old, new } => {
143                suggestions.push(rule_r05_rename_column(table, old, new));
144            }
145
146            // ── R06: RENAME TABLE ─────────────────────────────────────────
147            ParsedStatement::AlterTableRenameTable { old, new } => {
148                suggestions.push(rule_r06_rename_table(old, new));
149            }
150
151            // ── R07: ALTER COLUMN TYPE on a non-trivial table ─────────────
152            ParsedStatement::AlterTableAlterColumnType {
153                table,
154                column,
155                new_type,
156            } => {
157                let rows = row_counts.get(table).copied().unwrap_or(0);
158                // Always emit for type changes — even small tables deserve a warning
159                suggestions.push(rule_r07_alter_column_type(table, column, new_type, rows));
160            }
161
162            // ── R08: Long ACCESS EXCLUSIVE lock (catch-all for other DDL) ─
163            ParsedStatement::DropTable { tables, .. } => {
164                for t in tables {
165                    let rows = row_counts.get(t).copied().unwrap_or(0);
166                    let est = estimate_lock_secs(rows);
167                    if est > 5 {
168                        suggestions.push(rule_r08_long_lock(&format!("DROP TABLE {t}"), est));
169                    }
170                }
171            }
172
173            _ => {}
174        }
175    }
176
177    // Sort so most severe issues appear first
178    suggestions.sort_by(|a, b| b.severity.cmp(&a.severity));
179    suggestions
180}
181
182/// Rewrite the raw SQL text, applying all auto-fixable suggestions in-place.
183///
184/// Currently handles:
185/// - **R01**: Inserts `CONCURRENTLY` after `CREATE [UNIQUE] INDEX`
186///
187/// **B-04 fix**: If a `CREATE INDEX` is inside a `BEGIN` / `COMMIT` transaction
188/// block, inserts a warning comment instead of adding `CONCURRENTLY`, because
189/// PostgreSQL forbids `CREATE INDEX CONCURRENTLY` inside a transaction.
190///
191/// Returns the modified SQL string. Lines not matched by any rule are
192/// passed through unchanged.
193pub fn apply_fixes(sql: &str, suggestions: &[FixSuggestion]) -> String {
194    let has_r01 = suggestions
195        .iter()
196        .any(|s| s.rule_id == "R01" && s.auto_fixable);
197    if !has_r01 {
198        return sql.to_string();
199    }
200    if is_inside_transaction_block(sql) {
201        // B-04: Cannot use CONCURRENTLY inside a transaction — emit a comment
202        return rewrite_index_concurrent_in_txn(sql);
203    }
204    rewrite_index_concurrent(sql)
205}
206
207/// Returns `true` if the SQL text contains an explicit transaction block
208/// (`BEGIN`, `START TRANSACTION`) with a corresponding `COMMIT` or `ROLLBACK`.
209fn is_inside_transaction_block(sql: &str) -> bool {
210    let upper = sql.to_uppercase();
211    let has_begin = upper.contains("BEGIN") || upper.contains("START TRANSACTION");
212    let has_commit = upper.contains("COMMIT") || upper.contains("ROLLBACK");
213    has_begin && has_commit
214}
215
216/// When CONCURRENTLY cannot be added (inside a transaction), insert an
217/// explanatory comment before each offending CREATE INDEX statement.
218fn rewrite_index_concurrent_in_txn(sql: &str) -> String {
219    sql.lines()
220        .flat_map(|line| {
221            let upper = line.to_uppercase();
222            if (upper.contains("CREATE INDEX") || upper.contains("CREATE UNIQUE INDEX"))
223                && !upper.contains("CONCURRENTLY")
224            {
225                vec![
226                    "-- ⚠️  schemarisk: Cannot use CONCURRENTLY inside a transaction block."
227                        .to_string(),
228                    "-- Remove BEGIN/COMMIT wrapper and run this statement standalone.".to_string(),
229                    line.to_string(),
230                ]
231            } else {
232                vec![line.to_string()]
233            }
234        })
235        .collect::<Vec<_>>()
236        .join("\n")
237}
238
239// ─────────────────────────────────────────────
240// Rule implementations
241// ─────────────────────────────────────────────
242
243/// R01 — CREATE INDEX without CONCURRENTLY.
244fn rule_r01_index_concurrently(
245    table: &str,
246    columns: &str,
247    unique_kw: &str,
248    name: &str,
249    rows: u64,
250) -> FixSuggestion {
251    let rows_note = if rows > 0 {
252        format!(
253            " The table has approximately {} rows — index build will take roughly {} seconds.",
254            fmt_rows(rows),
255            rows / 500_000 + 1
256        )
257    } else {
258        String::new()
259    };
260    FixSuggestion {
261        rule_id: "R01".to_string(),
262        title: "Use CREATE INDEX CONCURRENTLY to avoid blocking writes".to_string(),
263        explanation: format!(
264            "CREATE INDEX without CONCURRENTLY acquires a SHARE lock that blocks all \
265             INSERT, UPDATE, and DELETE statements for the entire duration of the index \
266             build.{rows_note} Using CONCURRENTLY performs two table scans and allows \
267             DML to continue throughout, at the cost of a longer total build time. \
268             Note: CONCURRENTLY cannot run inside a transaction block."
269        ),
270        fixed_sql: Some(format!(
271            "CREATE {unique_kw}INDEX CONCURRENTLY {name}\n  ON {table}({columns});"
272        )),
273        migration_steps: None,
274        severity: FixSeverity::Blocking,
275        docs_url: Some(
276            "https://www.postgresql.org/docs/current/sql-createindex.html#SQL-CREATEINDEX-CONCURRENTLY"
277                .to_string(),
278        ),
279        auto_fixable: true,
280    }
281}
282
283/// R02 — ADD COLUMN NOT NULL without DEFAULT.
284fn rule_r02_add_not_null(table: &str, column: &str, data_type: &str, rows: u64) -> FixSuggestion {
285    let rows_note = if rows > 0 {
286        format!(" (~{} rows)", fmt_rows(rows))
287    } else {
288        String::new()
289    };
290    FixSuggestion {
291        rule_id: "R02".to_string(),
292        title: format!(
293            "Adding NOT NULL column '{column}' without DEFAULT will fail on non-empty tables"
294        ),
295        explanation: format!(
296            "ALTER TABLE {table}{rows_note} ADD COLUMN {column} {data_type} NOT NULL \
297             fails immediately if the table contains any existing rows — PostgreSQL \
298             cannot assign a value to the new column for those rows. Use the \
299             three-step pattern below for a zero-downtime migration that works on \
300             PostgreSQL 11+ (which stores the DEFAULT without rewriting the table)."
301        ),
302        fixed_sql: None,
303        migration_steps: Some(vec![
304            "-- Step 1: Add the column with a sensible DEFAULT (instant on PG ≥ 11 for constant defaults)".to_string(),
305            format!("ALTER TABLE {table}"),
306            format!("  ADD COLUMN {column} {data_type} NOT NULL DEFAULT 'YOUR_DEFAULT_VALUE';"),
307            String::new(),
308            "-- Step 2: Back-fill outdated rows in batches (avoids long lock)".to_string(),
309            format!("-- Run this in a loop until 0 rows are updated:"),
310            format!("WITH batch AS ("),
311            format!("  SELECT ctid FROM {table}"),
312            format!("  WHERE {column} = 'YOUR_DEFAULT_VALUE'"),
313            format!("  LIMIT 10000"),
314            format!(")"),
315            format!("UPDATE {table} SET {column} = 'YOUR_REAL_VALUE'"),
316            format!("WHERE ctid IN (SELECT ctid FROM batch);"),
317            String::new(),
318            "-- Step 3: Remove the synthetic default (optional) once all rows are back-filled".to_string(),
319            format!("ALTER TABLE {table} ALTER COLUMN {column} DROP DEFAULT;"),
320        ]),
321        severity: FixSeverity::Blocking,
322        docs_url: Some(
323            "https://www.postgresql.org/docs/current/sql-altertable.html".to_string(),
324        ),
325        auto_fixable: false,
326    }
327}
328
329/// R03 — DROP COLUMN; advise two-phase deployment.
330fn rule_r03_drop_column(table: &str, column: &str, rows: u64) -> FixSuggestion {
331    let rows_note = if rows > 1_000_000 {
332        format!(" The table has ~{} rows.", fmt_rows(rows))
333    } else {
334        String::new()
335    };
336    let severity = if rows > 1_000_000 {
337        FixSeverity::Blocking
338    } else {
339        FixSeverity::Warning
340    };
341    FixSuggestion {
342        rule_id: "R03".to_string(),
343        title: format!("Deploy app changes before dropping column '{column}' from '{table}'"),
344        explanation: format!(
345            "DROP COLUMN is irreversible and holds an ACCESS EXCLUSIVE lock for the \
346             duration of the catalog update.{rows_note}  Any in-flight query or \
347             transaction referencing this column will fail.  Use the two-phase \
348             deployment pattern to ensure zero application downtime."
349        ),
350        fixed_sql: None,
351        migration_steps: Some(vec![
352            format!("-- Phase 1 — Application deploy (no DB change yet)"),
353            format!("-- Remove all code that reads or writes '{column}' from '{table}'"),
354            format!("-- Verify no ORM model, query, or migration references this column"),
355            String::new(),
356            format!("-- Phase 2 — Run this migration AFTER the app is fully deployed"),
357            format!("ALTER TABLE {table} DROP COLUMN IF EXISTS {column};"),
358            String::new(),
359            format!("-- Optional: create a backup before dropping"),
360            format!("-- CREATE TABLE {table}_{column}_backup AS"),
361            format!("--   SELECT id, {column} FROM {table};"),
362        ]),
363        severity,
364        docs_url: Some("https://www.postgresql.org/docs/current/sql-altertable.html".to_string()),
365        auto_fixable: false,
366    }
367}
368
369/// R04 — ADD FOREIGN KEY with no index on the referencing column.
370fn rule_r04_missing_fk_index(table: &str, fk_columns: &[String]) -> FixSuggestion {
371    let col_list = fk_columns.join(", ");
372    let col_snake = fk_columns.join("_");
373    FixSuggestion {
374        rule_id: "R04".to_string(),
375        title: format!("Add an index on FK column(s) ({col_list}) to prevent sequential scans"),
376        explanation: format!(
377            "PostgreSQL does NOT automatically create an index on foreign key columns. \
378             Without an index on '{table}.({col_list})', every DELETE or UPDATE on the \
379             referenced parent table triggers a full sequential scan of '{table}' to \
380             check referential integrity. This is catastrophic on tables larger than \
381             10k rows and grows linearly with table size."
382        ),
383        fixed_sql: Some(format!(
384            "CREATE INDEX CONCURRENTLY idx_{table}_{col_snake}\n  ON {table}({col_list});"
385        )),
386        migration_steps: None,
387        severity: FixSeverity::Warning,
388        docs_url: Some("https://www.postgresql.org/docs/current/indexes-intro.html".to_string()),
389        auto_fixable: false,
390    }
391}
392
393/// R05 — RENAME COLUMN; suggest expand-contract pattern.
394fn rule_r05_rename_column(table: &str, old: &str, new: &str) -> FixSuggestion {
395    FixSuggestion {
396        rule_id: "R05".to_string(),
397        title: format!("Use expand-contract pattern to rename '{old}' → '{new}' without downtime"),
398        explanation: format!(
399            "Renaming column '{old}' in '{table}' is a **breaking change** for every \
400             piece of application code, ORM model, stored procedure, view, and query \
401             that references the old column name. The expand-contract (aka \
402             parallel-change) pattern lets you rename a column while keeping both \
403             names alive during the transition window, giving you a zero-downtime \
404             path."
405        ),
406        fixed_sql: None,
407        migration_steps: Some(vec![
408            format!("-- Migration A: Add new column and sync data"),
409            format!("ALTER TABLE {table} ADD COLUMN {new} <same_type_as_{old}>;"),
410            format!("UPDATE {table} SET {new} = {old};"),
411            String::new(),
412            format!("-- Application deploy: Dual-write to both '{old}' and '{new}'"),
413            format!("--   (reads still come from '{old}')"),
414            String::new(),
415            format!("-- Application deploy: Switch reads to '{new}'"),
416            format!("--   (still write to both)"),
417            String::new(),
418            format!("-- Application deploy: Stop writing to '{old}'"),
419            String::new(),
420            format!("-- Migration B: Drop old column"),
421            format!("ALTER TABLE {table} DROP COLUMN {old};"),
422        ]),
423        severity: FixSeverity::Blocking,
424        docs_url: Some("https://martinfowler.com/bliki/ParallelChange.html".to_string()),
425        auto_fixable: false,
426    }
427}
428
429/// R06 — RENAME TABLE; suggest view-based transition.
430fn rule_r06_rename_table(old: &str, new: &str) -> FixSuggestion {
431    FixSuggestion {
432        rule_id: "R06".to_string(),
433        title: format!("Renaming table '{old}' → '{new}' breaks all downstream code instantly"),
434        explanation: format!(
435            "Renaming table '{old}' invalidates ALL queries, ORM models, foreign key \
436             constraints, views, triggers, and stored procedures that reference the old \
437             name. This is one of the most dangerous DDL operations. Use a transitional \
438             compatibility view (Option A) or the full expand-contract pattern (Option B) \
439             to provide a zero-downtime migration path."
440        ),
441        fixed_sql: None,
442        migration_steps: Some(vec![
443            format!("-- ── Option A: Rename + leave compatibility view ────────────────"),
444            format!("ALTER TABLE {old} RENAME TO {new};"),
445            format!("-- Create a view using the old name so existing queries still work:"),
446            format!("CREATE VIEW {old} AS SELECT * FROM {new};"),
447            format!("-- Remove the view after all app code has been updated to use '{new}'"),
448            String::new(),
449            format!("-- ── Option B: Full expand-contract ────────────────────────────"),
450            format!("-- Step 1: Create new table '{new}' with identical schema"),
451            format!("-- Step 2: Create triggers to sync writes from '{old}' → '{new}'"),
452            format!("-- Step 3: Back-fill '{new}' from '{old}' for historical rows"),
453            format!("-- Step 4: Deploy app to write to '{new}', read from both"),
454            format!("-- Step 5: Deploy app to read only from '{new}'"),
455            format!("-- Step 6: Drop triggers + old table '{old}'"),
456        ]),
457        severity: FixSeverity::Blocking,
458        docs_url: Some(
459            "https://braintreepayments.com/blog/safe-operations-for-high-volume-postgresql/"
460                .to_string(),
461        ),
462        auto_fixable: false,
463    }
464}
465
466/// R07 — ALTER COLUMN TYPE; advise shadow-column pattern.
467fn rule_r07_alter_column_type(
468    table: &str,
469    column: &str,
470    new_type: &str,
471    rows: u64,
472) -> FixSuggestion {
473    let rows_clause = if rows > 0 {
474        format!(" (~{} rows)", fmt_rows(rows))
475    } else {
476        String::new()
477    };
478
479    // Detect safe type conversions (metadata-only on PG9.2+)
480    let upper_type = new_type.to_uppercase();
481    let is_varchar_expansion = upper_type.starts_with("VARCHAR")
482        || upper_type.starts_with("CHARACTER VARYING")
483        || upper_type == "TEXT";
484    let is_numeric_precision_increase =
485        upper_type.starts_with("NUMERIC") || upper_type.starts_with("DECIMAL");
486
487    // If this is a potentially safe conversion, note it
488    let safe_conversion_note = if is_varchar_expansion {
489        " Note: Increasing VARCHAR length or converting to TEXT is metadata-only on PG9.2+ \
490         — no table rewrite required in that case."
491    } else if is_numeric_precision_increase {
492        " Note: Increasing NUMERIC precision is safe. Decreasing precision or scale requires \
493         a full table rewrite."
494    } else {
495        ""
496    };
497
498    FixSuggestion {
499        rule_id: "R07".to_string(),
500        title: format!(
501            "Type change on '{table}.{column}' triggers full table rewrite under ACCESS EXCLUSIVE lock"
502        ),
503        explanation: format!(
504            "Changing the type of column '{column}' in '{table}'{rows_clause} causes \
505             PostgreSQL to rewrite the entire table while holding an ACCESS EXCLUSIVE \
506             lock. All reads and writes are blocked for the entire duration. For \
507             large tables this can mean minutes of downtime.{safe_conversion_note} \
508             Use the shadow-column pattern to perform the type change online."
509        ),
510        fixed_sql: None,
511        migration_steps: Some(vec![
512            format!("-- Step 1: Add shadow column with new type"),
513            format!("ALTER TABLE {table} ADD COLUMN {column}_v2 {new_type};"),
514            String::new(),
515            format!("-- Step 2: Back-fill in batches (prevents long lock)"),
516            format!("-- Run in a loop until UPDATE returns 0 rows:"),
517            format!("WITH batch AS ("),
518            format!("  SELECT ctid FROM {table}"),
519            format!("  WHERE {column} IS NOT NULL AND {column}_v2 IS NULL"),
520            format!("  LIMIT 10000"),
521            format!(")"),
522            format!("UPDATE {table}"),
523            format!("  SET {column}_v2 = {column}::{new_type}"),
524            format!("WHERE ctid IN (SELECT ctid FROM batch);"),
525            String::new(),
526            format!("-- Step 3: Deploy app to write to both columns"),
527            String::new(),
528            format!("-- Step 4: Atomically swap column names"),
529            format!("ALTER TABLE {table}"),
530            format!("  RENAME COLUMN {column}    TO {column}_old;"),
531            format!("ALTER TABLE {table}"),
532            format!("  RENAME COLUMN {column}_v2 TO {column};"),
533            String::new(),
534            format!("-- Step 5: Drop old column after verifying app health"),
535            format!("ALTER TABLE {table} DROP COLUMN {column}_old;"),
536        ]),
537        severity: FixSeverity::Blocking,
538        docs_url: Some(
539            "https://www.postgresql.org/docs/current/sql-altertable.html".to_string(),
540        ),
541        auto_fixable: false,
542    }
543}
544
545/// R09 — ADD COLUMN with DEFAULT; PG10 and below rewrites the full table.
546/// On PG11+ this is a metadata-only operation (no table rewrite).
547/// We always emit an info/warning so teams know which PG version changes the behaviour.
548fn rule_r09_add_column_default(
549    table: &str,
550    column: &str,
551    data_type: &str,
552    rows: u64,
553) -> FixSuggestion {
554    let rows_note = if rows > 0 {
555        format!(" (~{} rows)", fmt_rows(rows))
556    } else {
557        String::new()
558    };
559    FixSuggestion {
560        rule_id: "R09".to_string(),
561        title: format!(
562            "ADD COLUMN '{column}' WITH DEFAULT: safe on PG11+, table-rewrite on PG10 and below"
563        ),
564        explanation: format!(
565            "PostgreSQL 11 introduced the ability to add a column with a constant DEFAULT \
566             value without rewriting the table — the default is stored in the system catalog \
567             and applied on-the-fly at query time. On PostgreSQL 10 and below, adding a column \
568             with ANY default requires a full table rewrite{rows_note} holding ACCESS EXCLUSIVE \
569             lock. Note: Volatile defaults like now() or random() still require a table rewrite \
570             even on PG11+. Always run 'SELECT version()' to confirm your PostgreSQL version. \
571             Pass --pg-version to SchemaRisk to get accurate risk scores."
572        ),
573        fixed_sql: None,
574        migration_steps: Some(vec![
575            format!("-- If you are on PostgreSQL 11+ with a CONSTANT default:"),
576            format!("-- This is already safe — no changes needed."),
577            format!("ALTER TABLE {table}"),
578            format!("  ADD COLUMN {column} {data_type} DEFAULT <your_constant_value>;"),
579            String::new(),
580            format!(
581                "-- ⚠ WARNING: Volatile defaults (now(), random(), etc.) still rewrite the table!"
582            ),
583            String::new(),
584            format!("-- ── If you are on PostgreSQL 10 or below ────────────────────"),
585            format!("-- Step 1: Add column as nullable (no default, no rewrite)"),
586            format!("ALTER TABLE {table} ADD COLUMN {column} {data_type};"),
587            String::new(),
588            format!("-- Step 2: Back-fill in batches during low-traffic window"),
589            format!("-- Run in a loop until 0 rows are updated:"),
590            format!("WITH batch AS ("),
591            format!("  SELECT ctid FROM {table}"),
592            format!("  WHERE {column} IS NULL"),
593            format!("  LIMIT 10000"),
594            format!(")"),
595            format!("UPDATE {table} SET {column} = <your_value>"),
596            format!("WHERE ctid IN (SELECT ctid FROM batch);"),
597            String::new(),
598            format!("-- Step 3: Set NOT NULL constraint after back-fill is complete"),
599            format!("ALTER TABLE {table} ALTER COLUMN {column} SET NOT NULL;"),
600        ]),
601        severity: FixSeverity::Info,
602        docs_url: Some(
603            "https://www.postgresql.org/docs/11/release-11.html#id-1.11.6.14.4".to_string(),
604        ),
605        auto_fixable: false,
606    }
607}
608
609/// R08 — Long ACCESS EXCLUSIVE lock; suggest lock_timeout + pg_repack.
610fn rule_r08_long_lock(description: &str, est_secs: u64) -> FixSuggestion {
611    FixSuggestion {
612        rule_id: "R08".to_string(),
613        title: format!("ACCESS EXCLUSIVE lock held for ~{est_secs}s — protect with lock_timeout"),
614        explanation: format!(
615            "The operation '{}' acquires ACCESS EXCLUSIVE lock for an estimated \
616             ~{est_secs} seconds. During this window every query, transaction, and \
617             connection waiting to access the table is queued. A single long-running \
618             transaction before the migration can turn a 30-second lock into minutes \
619             of application downtime. Set lock_timeout to prevent lock pile-up.",
620            shorten_desc(description, 80)
621        ),
622        fixed_sql: None,
623        migration_steps: Some(vec![
624            "-- Wrap your migration in a lock_timeout guard:".to_string(),
625            "BEGIN;".to_string(),
626            "  SET lock_timeout = '3s';        -- abort if lock is not acquired in 3s".to_string(),
627            "  SET statement_timeout = '120s'; -- abort if statement runs > 2 min".to_string(),
628            String::new(),
629            "  -- YOUR MIGRATION HERE".to_string(),
630            String::new(),
631            "COMMIT;".to_string(),
632            String::new(),
633            "-- For tables > 1GB, consider pg_repack for zero-downtime online rewrites:"
634                .to_string(),
635            "-- https://github.com/reorg/pg_repack".to_string(),
636            "-- pg_repack --dbname=<db_url> --table=<table_name>".to_string(),
637        ]),
638        severity: FixSeverity::Warning,
639        docs_url: Some("https://github.com/reorg/pg_repack".to_string()),
640        auto_fixable: false,
641    }
642}
643
644// ─────────────────────────────────────────────
645// SQL rewriting
646// ─────────────────────────────────────────────
647
648/// Rewrite raw SQL: insert `CONCURRENTLY` after every `CREATE [UNIQUE] INDEX`
649/// that does not already have it.
650///
651/// This preserves all other SQL untouched and is safe to apply repeatedly.
652pub fn rewrite_index_concurrent(sql: &str) -> String {
653    sql.lines()
654        .map(|line| {
655            let upper = line.to_uppercase();
656            // Already concurrent — leave alone
657            if upper.contains("CONCURRENTLY") {
658                return line.to_string();
659            }
660
661            // CREATE UNIQUE INDEX <name> → CREATE UNIQUE INDEX CONCURRENTLY <name>
662            if let Some(pos) = upper.find("CREATE UNIQUE INDEX") {
663                let after = pos + "CREATE UNIQUE INDEX".len();
664                let prefix = &line[..after];
665                let rest = line[after..].trim_start();
666                return format!("{prefix} CONCURRENTLY {rest}");
667            }
668
669            // CREATE INDEX <name> → CREATE INDEX CONCURRENTLY <name>
670            if let Some(pos) = upper.find("CREATE INDEX") {
671                let after = pos + "CREATE INDEX".len();
672                let prefix = &line[..after];
673                let rest = line[after..].trim_start();
674                return format!("{prefix} CONCURRENTLY {rest}");
675            }
676
677            line.to_string()
678        })
679        .collect::<Vec<_>>()
680        .join("\n")
681}
682
683// ─────────────────────────────────────────────
684// Formatting helpers
685// ─────────────────────────────────────────────
686
687/// Format a row count as a human-readable string (e.g. "187.2M").
688fn fmt_rows(n: u64) -> String {
689    if n >= 1_000_000_000 {
690        format!("{:.1}B", n as f64 / 1_000_000_000.0)
691    } else if n >= 1_000_000 {
692        format!("{:.1}M", n as f64 / 1_000_000.0)
693    } else if n >= 1_000 {
694        format!("{:.0}K", n as f64 / 1_000.0)
695    } else {
696        n.to_string()
697    }
698}
699
700/// Rough estimate: seconds an ACCESS EXCLUSIVE lock will be held, based on
701/// estimated row count at ~500k rows/second.
702fn estimate_lock_secs(rows: u64) -> u64 {
703    if rows < 500_000 {
704        1
705    } else {
706        rows / 500_000
707    }
708}
709
710/// Truncate a description string to `max` chars with an ellipsis.
711fn shorten_desc(s: &str, max: usize) -> &str {
712    if s.len() <= max {
713        s
714    } else {
715        &s[..max]
716    }
717}
718
719// ─────────────────────────────────────────────
720// Unit tests
721// ─────────────────────────────────────────────
722
723#[cfg(test)]
724mod tests {
725    use super::*;
726    use crate::parser;
727
728    #[test]
729    fn test_r01_triggers_for_non_concurrent_index() {
730        let sql = "CREATE INDEX idx_users_email ON users(email);";
731        let stmts = parser::parse(sql).expect("parse");
732        let fixes = suggest_fixes(&stmts, &HashMap::new());
733        assert!(fixes.iter().any(|f| f.rule_id == "R01"));
734        assert!(fixes.iter().any(|f| f.auto_fixable));
735    }
736
737    #[test]
738    fn test_r01_skipped_for_concurrent_index() {
739        let sql = "CREATE INDEX CONCURRENTLY idx_users_email ON users(email);";
740        let stmts = parser::parse(sql).expect("parse");
741        let fixes = suggest_fixes(&stmts, &HashMap::new());
742        assert!(fixes.iter().all(|f| f.rule_id != "R01"));
743    }
744
745    #[test]
746    fn test_r02_triggers_for_not_null_no_default() {
747        let sql = "ALTER TABLE users ADD COLUMN verified BOOLEAN NOT NULL;";
748        let stmts = parser::parse(sql).expect("parse");
749        let fixes = suggest_fixes(&stmts, &HashMap::new());
750        assert!(fixes.iter().any(|f| f.rule_id == "R02"));
751    }
752
753    #[test]
754    fn test_rewrite_concurrent() {
755        let sql = "CREATE INDEX idx_a ON t(col);";
756        let result = rewrite_index_concurrent(sql);
757        assert!(result.contains("CONCURRENTLY"), "got: {result}");
758    }
759
760    #[test]
761    fn test_rewrite_concurrent_idempotent() {
762        let sql = "CREATE INDEX CONCURRENTLY idx_a ON t(col);";
763        let result = rewrite_index_concurrent(sql);
764        // Should not double-insert CONCURRENTLY
765        let count = result.matches("CONCURRENTLY").count();
766        assert_eq!(count, 1, "got: {result}");
767    }
768}
schema_risk/recommendation.rs

schema_risk/
recommendation.rs