Skip to main content

pgdrift_db/
discovery.rs

1use serde::Serialize;
2use sqlx::PgPool;
3
4/// Represents a JSOBN column in discovered in the DB
5#[derive(Debug, Clone, Serialize)]
6pub struct JsonbColumn {
7    pub schema: String,
8    pub table: String,
9    pub column: String,
10    pub estimated_rows: Option<i64>,
11}
12
13impl JsonbColumn {
14    /// Get the fully qualified column name
15    pub fn full_name(&self) -> String {
16        format!("{}.{}.{}", self.schema, self.table, self.column)
17    }
18}
19
20/// Discover all JSONB columns in the DB
21///
22/// Queries information_schema to find all columns with the type 'Jsonb',
23/// excluding system schemas (pg_catalog, information_schema).
24/// Also, fetch estimated row counts from pg_stat_user_tables
25pub async fn discover_jsonb_columns(pool: &PgPool) -> Result<Vec<JsonbColumn>, sqlx::Error> {
26    let columns = sqlx::query_as::<_, (String, String, String, Option<i64>)>(
27        r#"
28          SELECT
29              c.table_schema,
30              c.table_name,
31              c.column_name,
32              s.n_live_tup as estimated_rows
33          FROM information_schema.columns c
34          LEFT JOIN pg_stat_user_tables s
35              ON s.schemaname = c.table_schema
36              AND s.relname = c.table_name
37          WHERE c.data_type = 'jsonb'
38              AND c.table_schema NOT IN ('pg_catalog', 'information_schema')
39          ORDER BY c.table_schema, c.table_name, c.column_name
40          "#,
41    )
42    .fetch_all(pool)
43    .await?
44    .into_iter()
45    .map(|(schema, table, column, estimated_rows)| JsonbColumn {
46        schema,
47        table,
48        column,
49        estimated_rows,
50    })
51    .collect();
52
53    Ok(columns)
54}
55
56/// Get exact row count for a specific table
57///
58/// Executes COUNT(*) query on the specified table.
59/// Note: This can be slow on large tables - use estimated_rows from
60/// discover_jsonb_columns for quick estimates.
61pub async fn get_row_count(pool: &PgPool, schema: &str, table: &str) -> Result<i64, sqlx::Error> {
62    let count: i64 = sqlx::query_scalar(&format!(
63        "SELECT COUNT(*) FROM {}.{}",
64        quote_identifier(schema),
65        quote_identifier(table)
66    ))
67    .fetch_one(pool)
68    .await?;
69
70    Ok(count)
71}
72
73/// Quote a postgresql identifier (schema/table/column name) to prevent sql injection
74fn quote_identifier(identifier: &str) -> String {
75    format!("\"{}\"", identifier.replace("\"", "\"\""))
76}