kaccy_db/
table_stats.rs

1//! Table statistics and size utilities
2//!
3//! This module provides utilities for retrieving table statistics, sizes,
4//! and usage information from PostgreSQL.
5
6use crate::error::Result;
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9use sqlx::PgPool;
10
11/// Comprehensive table statistics
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct TableStats {
14    /// Schema name
15    pub schema: String,
16    /// Table name
17    pub table_name: String,
18    /// Total table size including indexes (bytes)
19    pub total_size_bytes: i64,
20    /// Table size excluding indexes (bytes)
21    pub table_size_bytes: i64,
22    /// Index size (bytes)
23    pub index_size_bytes: i64,
24    /// Approximate row count
25    pub row_count: i64,
26    /// Number of sequential scans
27    pub seq_scans: i64,
28    /// Number of index scans
29    pub index_scans: i64,
30    /// Number of live tuples
31    pub live_tuples: i64,
32    /// Number of dead tuples
33    pub dead_tuples: i64,
34    /// Last vacuum time
35    pub last_vacuum: Option<DateTime<Utc>>,
36    /// Last analyze time
37    pub last_analyze: Option<DateTime<Utc>>,
38}
39
40/// Index usage statistics
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct IndexStats {
43    /// Schema name
44    pub schema: String,
45    /// Table name
46    pub table_name: String,
47    /// Index name
48    pub index_name: String,
49    /// Index size in bytes
50    pub size_bytes: i64,
51    /// Number of index scans
52    pub scans: i64,
53    /// Number of tuples read by index scans
54    pub tuples_read: i64,
55    /// Number of tuples fetched by index scans
56    pub tuples_fetched: i64,
57}
58
59/// Database size summary
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct DatabaseSizeInfo {
62    /// Database name
63    pub database_name: String,
64    /// Total database size in bytes
65    pub size_bytes: i64,
66    /// Human-readable size
67    pub size_formatted: String,
68    /// Number of tables
69    pub table_count: i64,
70    /// Number of indexes
71    pub index_count: i64,
72}
73
74/// Get comprehensive statistics for a specific table
75pub async fn get_table_stats(pool: &PgPool, table_name: &str) -> Result<Option<TableStats>> {
76    let stats = sqlx::query_as::<
77        _,
78        (
79            String,
80            String,
81            i64,
82            i64,
83            i64,
84            i64,
85            i64,
86            i64,
87            i64,
88            i64,
89            Option<DateTime<Utc>>,
90            Option<DateTime<Utc>>,
91        ),
92    >(
93        r#"
94        SELECT
95            schemaname,
96            tablename,
97            pg_total_relation_size(schemaname || '.' || tablename) as total_size,
98            pg_relation_size(schemaname || '.' || tablename) as table_size,
99            pg_total_relation_size(schemaname || '.' || tablename) -
100                pg_relation_size(schemaname || '.' || tablename) as index_size,
101            n_tup_ins + n_tup_upd + n_tup_del as row_count,
102            seq_scan,
103            idx_scan,
104            n_live_tup,
105            n_dead_tup,
106            last_vacuum,
107            last_analyze
108        FROM pg_stat_user_tables
109        WHERE tablename = $1
110        "#,
111    )
112    .bind(table_name)
113    .fetch_optional(pool)
114    .await?;
115
116    Ok(stats.map(|s| TableStats {
117        schema: s.0,
118        table_name: s.1,
119        total_size_bytes: s.2,
120        table_size_bytes: s.3,
121        index_size_bytes: s.4,
122        row_count: s.5,
123        seq_scans: s.6,
124        index_scans: s.7,
125        live_tuples: s.8,
126        dead_tuples: s.9,
127        last_vacuum: s.10,
128        last_analyze: s.11,
129    }))
130}
131
132/// Get statistics for all tables, ordered by size
133pub async fn get_all_table_stats(pool: &PgPool) -> Result<Vec<TableStats>> {
134    let stats = sqlx::query_as::<
135        _,
136        (
137            String,
138            String,
139            i64,
140            i64,
141            i64,
142            i64,
143            i64,
144            i64,
145            i64,
146            i64,
147            Option<DateTime<Utc>>,
148            Option<DateTime<Utc>>,
149        ),
150    >(
151        r#"
152        SELECT
153            schemaname,
154            tablename,
155            pg_total_relation_size(schemaname || '.' || tablename) as total_size,
156            pg_relation_size(schemaname || '.' || tablename) as table_size,
157            pg_total_relation_size(schemaname || '.' || tablename) -
158                pg_relation_size(schemaname || '.' || tablename) as index_size,
159            n_tup_ins + n_tup_upd + n_tup_del as row_count,
160            seq_scan,
161            idx_scan,
162            n_live_tup,
163            n_dead_tup,
164            last_vacuum,
165            last_analyze
166        FROM pg_stat_user_tables
167        ORDER BY total_size DESC
168        "#,
169    )
170    .fetch_all(pool)
171    .await?;
172
173    Ok(stats
174        .into_iter()
175        .map(|s| TableStats {
176            schema: s.0,
177            table_name: s.1,
178            total_size_bytes: s.2,
179            table_size_bytes: s.3,
180            index_size_bytes: s.4,
181            row_count: s.5,
182            seq_scans: s.6,
183            index_scans: s.7,
184            live_tuples: s.8,
185            dead_tuples: s.9,
186            last_vacuum: s.10,
187            last_analyze: s.11,
188        })
189        .collect())
190}
191
192/// Get the largest tables in the database
193pub async fn get_largest_tables(pool: &PgPool, limit: i32) -> Result<Vec<TableStats>> {
194    let stats = get_all_table_stats(pool).await?;
195    Ok(stats.into_iter().take(limit as usize).collect())
196}
197
198/// Get index statistics for a table
199pub async fn get_table_index_stats(pool: &PgPool, table_name: &str) -> Result<Vec<IndexStats>> {
200    let stats = sqlx::query_as::<_, (String, String, String, i64, i64, i64, i64)>(
201        r#"
202        SELECT
203            schemaname,
204            tablename,
205            indexname,
206            pg_relation_size(schemaname || '.' || indexname) as size,
207            idx_scan,
208            idx_tup_read,
209            idx_tup_fetch
210        FROM pg_stat_user_indexes
211        WHERE tablename = $1
212        ORDER BY idx_scan DESC
213        "#,
214    )
215    .bind(table_name)
216    .fetch_all(pool)
217    .await?;
218
219    Ok(stats
220        .into_iter()
221        .map(|s| IndexStats {
222            schema: s.0,
223            table_name: s.1,
224            index_name: s.2,
225            size_bytes: s.3,
226            scans: s.4,
227            tuples_read: s.5,
228            tuples_fetched: s.6,
229        })
230        .collect())
231}
232
233/// Get unused indexes (indexes with zero scans)
234pub async fn get_unused_indexes(pool: &PgPool) -> Result<Vec<IndexStats>> {
235    let stats = sqlx::query_as::<_, (String, String, String, i64, i64, i64, i64)>(
236        r#"
237        SELECT
238            schemaname,
239            tablename,
240            indexname,
241            pg_relation_size(schemaname || '.' || indexname) as size,
242            idx_scan,
243            idx_tup_read,
244            idx_tup_fetch
245        FROM pg_stat_user_indexes
246        WHERE idx_scan = 0
247        AND indexname NOT LIKE '%_pkey'
248        ORDER BY size DESC
249        "#,
250    )
251    .fetch_all(pool)
252    .await?;
253
254    Ok(stats
255        .into_iter()
256        .map(|s| IndexStats {
257            schema: s.0,
258            table_name: s.1,
259            index_name: s.2,
260            size_bytes: s.3,
261            scans: s.4,
262            tuples_read: s.5,
263            tuples_fetched: s.6,
264        })
265        .collect())
266}
267
268/// Get database size information
269pub async fn get_database_size(pool: &PgPool) -> Result<DatabaseSizeInfo> {
270    let (db_name, size_bytes): (String, i64) = sqlx::query_as(
271        r#"
272        SELECT
273            current_database(),
274            pg_database_size(current_database())
275        "#,
276    )
277    .fetch_one(pool)
278    .await?;
279
280    let table_count: i64 = sqlx::query_scalar(
281        r#"
282        SELECT COUNT(*)
283        FROM pg_tables
284        WHERE schemaname = 'public'
285        "#,
286    )
287    .fetch_one(pool)
288    .await?;
289
290    let index_count: i64 = sqlx::query_scalar(
291        r#"
292        SELECT COUNT(*)
293        FROM pg_indexes
294        WHERE schemaname = 'public'
295        "#,
296    )
297    .fetch_one(pool)
298    .await?;
299
300    Ok(DatabaseSizeInfo {
301        database_name: db_name,
302        size_bytes,
303        size_formatted: crate::helpers::format_bytes(size_bytes as u64),
304        table_count,
305        index_count,
306    })
307}
308
309/// Get tables with highest sequential scan ratio
310///
311/// These tables might benefit from adding indexes.
312pub async fn get_high_seq_scan_tables(pool: &PgPool, min_scans: i64) -> Result<Vec<SeqScanInfo>> {
313    let tables = sqlx::query_as::<_, (String, i64, i64, f64)>(
314        r#"
315        SELECT
316            tablename,
317            seq_scan,
318            idx_scan,
319            CASE
320                WHEN (seq_scan + idx_scan) > 0
321                THEN (seq_scan::float / (seq_scan + idx_scan)::float) * 100
322                ELSE 0
323            END as seq_scan_percent
324        FROM pg_stat_user_tables
325        WHERE seq_scan > $1
326        ORDER BY seq_scan_percent DESC, seq_scan DESC
327        "#,
328    )
329    .bind(min_scans)
330    .fetch_all(pool)
331    .await?;
332
333    Ok(tables
334        .into_iter()
335        .map(|t| SeqScanInfo {
336            table_name: t.0,
337            seq_scans: t.1,
338            index_scans: t.2,
339            seq_scan_percent: t.3,
340        })
341        .collect())
342}
343
344/// Sequential scan information
345#[derive(Debug, Clone, Serialize, Deserialize)]
346pub struct SeqScanInfo {
347    /// Table name
348    pub table_name: String,
349    /// Number of sequential scans
350    pub seq_scans: i64,
351    /// Number of index scans
352    pub index_scans: i64,
353    /// Percentage of scans that were sequential
354    pub seq_scan_percent: f64,
355}
356
357/// Get cache hit ratio for tables
358pub async fn get_table_cache_hit_ratio(pool: &PgPool) -> Result<Vec<CacheHitInfo>> {
359    let tables = sqlx::query_as::<_, (String, i64, i64, f64)>(
360        r#"
361        SELECT
362            tablename,
363            heap_blks_read,
364            heap_blks_hit,
365            CASE
366                WHEN (heap_blks_read + heap_blks_hit) > 0
367                THEN (heap_blks_hit::float / (heap_blks_read + heap_blks_hit)::float) * 100
368                ELSE 0
369            END as cache_hit_ratio
370        FROM pg_statio_user_tables
371        WHERE (heap_blks_read + heap_blks_hit) > 0
372        ORDER BY cache_hit_ratio ASC
373        "#,
374    )
375    .fetch_all(pool)
376    .await?;
377
378    Ok(tables
379        .into_iter()
380        .map(|t| CacheHitInfo {
381            table_name: t.0,
382            blocks_read: t.1,
383            blocks_hit: t.2,
384            hit_ratio_percent: t.3,
385        })
386        .collect())
387}
388
389/// Cache hit ratio information
390#[derive(Debug, Clone, Serialize, Deserialize)]
391pub struct CacheHitInfo {
392    /// Table name
393    pub table_name: String,
394    /// Number of blocks read from disk
395    pub blocks_read: i64,
396    /// Number of blocks found in cache
397    pub blocks_hit: i64,
398    /// Cache hit ratio percentage
399    pub hit_ratio_percent: f64,
400}
401
402#[cfg(test)]
403mod tests {
404    use super::*;
405
406    #[test]
407    fn test_table_stats_structure() {
408        let stats = TableStats {
409            schema: "public".to_string(),
410            table_name: "users".to_string(),
411            total_size_bytes: 1_048_576,
412            table_size_bytes: 524_288,
413            index_size_bytes: 524_288,
414            row_count: 10000,
415            seq_scans: 100,
416            index_scans: 1000,
417            live_tuples: 9500,
418            dead_tuples: 500,
419            last_vacuum: None,
420            last_analyze: None,
421        };
422
423        assert_eq!(stats.table_name, "users");
424        assert_eq!(stats.row_count, 10000);
425    }
426
427    #[test]
428    fn test_index_stats_structure() {
429        let stats = IndexStats {
430            schema: "public".to_string(),
431            table_name: "users".to_string(),
432            index_name: "users_email_idx".to_string(),
433            size_bytes: 262_144,
434            scans: 500,
435            tuples_read: 5000,
436            tuples_fetched: 4500,
437        };
438
439        assert_eq!(stats.index_name, "users_email_idx");
440        assert_eq!(stats.scans, 500);
441    }
442
443    #[test]
444    fn test_database_size_info_structure() {
445        let info = DatabaseSizeInfo {
446            database_name: "mydb".to_string(),
447            size_bytes: 10_485_760,
448            size_formatted: "10.00 MB".to_string(),
449            table_count: 15,
450            index_count: 25,
451        };
452
453        assert_eq!(info.database_name, "mydb");
454        assert_eq!(info.table_count, 15);
455    }
456
457    #[test]
458    fn test_seq_scan_info_structure() {
459        let info = SeqScanInfo {
460            table_name: "orders".to_string(),
461            seq_scans: 1000,
462            index_scans: 100,
463            seq_scan_percent: 90.9,
464        };
465
466        assert_eq!(info.seq_scans, 1000);
467        assert!(info.seq_scan_percent > 90.0);
468    }
469
470    #[test]
471    fn test_cache_hit_info_structure() {
472        let info = CacheHitInfo {
473            table_name: "products".to_string(),
474            blocks_read: 100,
475            blocks_hit: 900,
476            hit_ratio_percent: 90.0,
477        };
478
479        assert_eq!(info.hit_ratio_percent, 90.0);
480    }
481
482    #[test]
483    fn test_table_stats_serialization() {
484        let stats = TableStats {
485            schema: "public".to_string(),
486            table_name: "test".to_string(),
487            total_size_bytes: 1024,
488            table_size_bytes: 512,
489            index_size_bytes: 512,
490            row_count: 100,
491            seq_scans: 10,
492            index_scans: 50,
493            live_tuples: 95,
494            dead_tuples: 5,
495            last_vacuum: None,
496            last_analyze: None,
497        };
498
499        let json = serde_json::to_string(&stats).unwrap();
500        let deserialized: TableStats = serde_json::from_str(&json).unwrap();
501
502        assert_eq!(deserialized.table_name, stats.table_name);
503        assert_eq!(deserialized.row_count, stats.row_count);
504    }
505}