Skip to main content

djogi_cli/
analyze.rs

1//! Partition / vacuum analysis for adopter Postgres tables.
2//!
3//! `djogi analyze` (T10) inspects `pg_stat_user_tables` and (when the
4//! extension is installed) `pg_partman` metadata to surface vacuum and
5//! partitioning recommendations to operators. The recommendation logic
6//! ([`recommend`]) is pure — no DB, no I/O, no global state — so it
7//! can be unit-tested against synthetic [`TableHealth`] inputs without
8//! a live database. The live-DB query path lives in
9//! [`fetch_table_health`]; the CLI dispatch entry point is [`run`].
10//!
11//! # Why a pure substrate
12//!
13//! `recommend()` is exposed as a free function taking only
14//! `&TableHealth` plus scalar threshold args. That shape is
15//! deliberately deterministic — the same inputs always produce the
16//! exact same output bytes. Two consequences fall out:
17//!
18//! 1. **Byte-stable JSON.** When T10.2 serialises a sorted
19//!    `Vec<(table_name, Recommendation)>` to `serde_json`, the result
20//!    is reproducible across runs / hosts / Postgres restarts. CI
21//!    dashboards that diff yesterday's `analyze --format json` output
22//!    against today's see only real changes, never iteration-order
23//!    churn.
24//! 2. **Trivial unit-testability.** No `tokio` runtime, no fixture DB,
25//!    no temp dirs — every recommendation rule is exercised in-process
26//!    against hand-built `TableHealth` values.
27//!
28//! # Threshold rationale
29//!
30//! Both thresholds are runtime arguments rather than constants because
31//! healthy bloat / partition-row ceilings vary per workload. The
32//! defaults chosen by the CLI (`0.2` and `10_000_000`) are conservative
33//! middle-of-the-road values; OLTP-heavy tables typically tighten the
34//! vacuum threshold, while warehouse-style tables loosen the partition
35//! row count. Adopters override on the command line without recompiling.
36//!
37//! # Spec
38//!
39//! `docs/superpowers/plans/granular-phase8/cluster-8epsilon-granular.md`
40//! §T10.1.
41
42// T10.2 wires the substrate to the live-DB query path + CLI
43// dispatch. Every type and helper introduced in T10.1 is now
44// referenced from `fetch_table_health` / `run` / the renderers; the
45// file-scoped `#![allow(dead_code)]` from T10.1 is therefore gone.
46
47use std::io::Write;
48
49use serde::Serialize;
50use tokio_postgres::error::SqlState;
51
52use djogi::__bypass::RawAccessExt as _;
53use djogi::DjogiError;
54use djogi::config::DjogiConfig;
55use djogi::context::DjogiContext;
56use djogi::pg::pool::DjogiPool;
57
58/// Snapshot of a single table's vacuum / partition health.
59///
60/// Field provenance (per T10.2's planned query):
61///
62/// - `table_name` — `pg_stat_user_tables.relname`
63/// - `n_live_tup`, `n_dead_tup` — `pg_stat_user_tables` columns of the
64///   same name; Postgres-maintained per-row visibility counters.
65/// - `last_analyze` — `pg_stat_user_tables.last_analyze`; `None` when
66///   the table has never been analysed (e.g. freshly created).
67/// - `partition_count` — `0` for plain tables, `>= 1` for partitioned
68///   parents (sourced via `pg_partitioned_table` join, with a
69///   `pg_partman` fallback when the extension is installed).
70///
71/// `last_analyze` is intentionally `time::OffsetDateTime`, not
72/// `chrono::DateTime` — djogi forbids `chrono` workspace-wide
73/// (CLAUDE.md "Dependencies excluded").
74#[derive(Debug, Clone, Serialize)]
75pub struct TableHealth {
76    pub table_name: String,
77    pub n_live_tup: i64,
78    pub n_dead_tup: i64,
79    pub last_analyze: Option<time::OffsetDateTime>,
80    pub partition_count: i32,
81}
82
83/// Recommendation produced by [`recommend`] for a single table.
84///
85/// # Precedence
86///
87/// When multiple rules would fire, [`recommend`] returns the
88/// highest-priority match per this strict ordering (highest first):
89///
90/// 1. [`Recommendation::VacuumNeeded`] — bloat dominates everything;
91///    autovacuum lag is the most operationally urgent signal because
92///    dead tuples block index health and inflate disk usage.
93/// 2. [`Recommendation::PartitionRecommended`] — an unpartitioned table
94///    has crossed the row-count threshold; partitioning is structural
95///    work that should land before the table grows further.
96/// 3. [`Recommendation::PartitionCountIncrease`] — partitions exist
97///    but average row count per partition exceeds the threshold;
98///    expanding the partition count is incremental tuning.
99/// 4. [`Recommendation::Healthy`] — no rule fires.
100///
101/// # JSON shape
102///
103/// The `#[serde(tag = "kind", rename_all = "snake_case")]` attribute
104/// produces internally-tagged JSON like
105/// `{"kind":"vacuum_needed","dead_tup_ratio":0.42}`. T10.2's
106/// `--format json` path serialises a sorted vector of
107/// `{table, recommendation}` pairs; the snake_case tag keeps the
108/// machine-readable output ergonomic for shell scripts and dashboards.
109#[derive(Debug, Clone, PartialEq, Serialize)]
110#[serde(tag = "kind", rename_all = "snake_case")]
111pub enum Recommendation {
112    /// Dead-tuple ratio exceeded `threshold_vacuum`; operator should
113    /// run `VACUUM` (or tune autovacuum).
114    VacuumNeeded {
115        /// `n_dead_tup / (n_live_tup + n_dead_tup)`. Always strictly
116        /// greater than the threshold that triggered the variant.
117        dead_tup_ratio: f64,
118    },
119    /// Unpartitioned table whose live row count exceeds
120    /// `threshold_partition_rows`; operator should partition.
121    PartitionRecommended {
122        /// Human-readable explanation including the row count and the
123        /// threshold that fired the rule. Stable string format so
124        /// `--format json` consumers can grep for substrings (`"not
125        /// partitioned"`, `"threshold:"`).
126        reason: String,
127    },
128    /// Partitioned table whose average rows-per-partition exceeds
129    /// `threshold_partition_rows`; operator should expand the partition
130    /// count.
131    PartitionCountIncrease {
132        /// Current partition count.
133        current: i32,
134        /// Suggested partition count — currently a simple doubling.
135        /// Bounded by `i32::saturating_mul` so pathological inputs
136        /// (e.g. 1.5B partitions) cap at `i32::MAX` rather than
137        /// overflowing.
138        suggested: i32,
139    },
140    /// No recommendation — table is within all thresholds.
141    Healthy,
142}
143
144/// Pure recommendation function for a single table.
145///
146/// # Determinism
147///
148/// `recommend` takes only the borrowed `TableHealth` and two scalar
149/// thresholds. It performs no I/O, reads no globals, allocates only
150/// the `String` inside `PartitionRecommended::reason` (when that arm
151/// fires), and traverses no unordered collections. Repeated invocation
152/// on byte-identical inputs returns byte-identical outputs — the
153/// `recommend_is_deterministic` test asserts this with 100 repetitions.
154///
155/// # Threshold semantics
156///
157/// - `threshold_vacuum`: dead-tuple ratio strictly above which
158///   [`Recommendation::VacuumNeeded`] fires. Typical: `0.2` (20% bloat).
159///   Higher values mean the operator tolerates more bloat before
160///   flagging.
161/// - `threshold_partition_rows`: live row count strictly above which
162///   an unpartitioned table triggers [`Recommendation::PartitionRecommended`].
163///   Typical: `10_000_000`. The same threshold is reused for the
164///   per-partition row average that drives
165///   [`Recommendation::PartitionCountIncrease`].
166///
167/// # Edge cases
168///
169/// - Empty table (`n_live_tup == 0 && n_dead_tup == 0`): vacuum check
170///   is short-circuited (division-by-zero guard). Partition checks
171///   still run but neither fires for an empty table.
172/// - `partition_count == 0`: treated as "not partitioned" — only the
173///   `PartitionRecommended` rule can fire.
174/// - `partition_count >= 1` but row count below threshold: falls
175///   through to `Healthy`.
176///
177/// # See also
178///
179/// [`Recommendation`] for the precedence ordering.
180pub fn recommend(
181    health: &TableHealth,
182    threshold_vacuum: f64,
183    threshold_partition_rows: i64,
184) -> Recommendation {
185    // 1. VacuumNeeded — highest priority. Skipped on empty tables to
186    //    avoid 0/0; an empty table cannot be bloated by definition.
187    //    `saturating_add` caps at `i64::MAX` rather than panicking
188    //    (debug) or wrapping (release) when both counters approach
189    //    `i64::MAX` — pathological stats values still produce a valid
190    //    ratio in `[0.0, 1.0]`.
191    let total_tup = health.n_live_tup.saturating_add(health.n_dead_tup);
192    if total_tup > 0 {
193        let ratio = health.n_dead_tup as f64 / total_tup as f64;
194        if ratio > threshold_vacuum {
195            return Recommendation::VacuumNeeded {
196                dead_tup_ratio: ratio,
197            };
198        }
199    }
200
201    // 2. PartitionRecommended — unpartitioned table over the row
202    //    threshold. `partition_count == 0` is the unpartitioned signal.
203    if health.partition_count == 0 && health.n_live_tup > threshold_partition_rows {
204        return Recommendation::PartitionRecommended {
205            reason: format!(
206                "table has {} live rows but is not partitioned (threshold: {})",
207                health.n_live_tup, threshold_partition_rows
208            ),
209        };
210    }
211
212    // 3. PartitionCountIncrease — partitioned but undersized partitions.
213    //    Average is integer-divided; precision is irrelevant since the
214    //    threshold gate is also an integer comparison.
215    if health.partition_count > 0 {
216        let avg_per_partition = health.n_live_tup / health.partition_count as i64;
217        if avg_per_partition > threshold_partition_rows {
218            return Recommendation::PartitionCountIncrease {
219                current: health.partition_count,
220                // Saturating multiplication caps at `i32::MAX` rather
221                // than overflowing — pathological partition counts
222                // (e.g. > i32::MAX/2) still produce a valid suggestion.
223                suggested: health.partition_count.saturating_mul(2),
224            };
225        }
226    }
227
228    // 4. Healthy — no rule fired.
229    Recommendation::Healthy
230}
231
232/// Errors surfaced by [`run`] / [`fetch_table_health`].
233///
234/// Each variant carries operator-actionable context — the goal is that
235/// an `eprintln!("djogi analyze: {e}")` line is enough to diagnose
236/// without grepping source. Mirrors `verify::VerifyError`'s shape.
237#[derive(Debug)]
238pub enum AnalyzeError {
239    /// Loading `Djogi.toml` (and its env overlays) failed.
240    Config(String),
241    /// Could not connect to the application database. URL is included
242    /// for diagnostics; the underlying `DjogiError` is rendered into
243    /// `message` so `AnalyzeError` stays `Send + Sync` regardless of
244    /// the variant we received.
245    Pool {
246        /// The application DB URL we attempted to reach. Echoed in the
247        /// operator-facing message so the resolution path is visible.
248        url: String,
249        /// Underlying error message. The framework's
250        /// `DjogiError::Pool` enum carries trait-object inner values
251        /// that aren't always `Send + Sync`; rendering eagerly keeps
252        /// `AnalyzeError` cheap to clone in test assertions and
253        /// avoids leaking deadpool types to consumers.
254        message: String,
255    },
256    /// `pg_stat_user_tables` query (or the optional pg_partman join)
257    /// surfaced a Postgres error that is NOT one of the "extension
258    /// absent" SQLSTATEs we tolerate. Rendered eagerly for the same
259    /// reasons as `Pool::message`.
260    Db(String),
261    /// Writing the rendered output to stdout failed (broken pipe,
262    /// disk full on a redirected `> file.json`, etc.).
263    Io(std::io::Error),
264    /// `serde_json` encoding failed. Should be unreachable because the
265    /// `Row` projection only contains primitive / `OffsetDateTime` /
266    /// known-`Serialize` types, but we surface it as a structured
267    /// variant rather than `unwrap()` so a future schema extension
268    /// that introduces a non-serialisable field fails loudly.
269    Json(serde_json::Error),
270}
271
272impl std::fmt::Display for AnalyzeError {
273    /// Every variant ships an operator-actionable remediation hint after
274    /// the cause — `eprintln!("djogi analyze: {e}")` should be enough to
275    /// either fix the problem or know what to file. The shape is
276    /// "<cause>. <remediation verb>...". The
277    /// `analyze_error_display_is_operator_actionable` test pins each
278    /// variant's remediation keyword (`Verify` / `Check` / `file an
279    /// issue`) so a future drift trips the test rather than silently
280    /// losing the hint.
281    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
282        match self {
283            AnalyzeError::Config(message) => write!(
284                f,
285                "config load: {message}. \
286                 Verify the Djogi.toml workspace path and the [database] section.",
287            ),
288            AnalyzeError::Pool { url, message } => write!(
289                f,
290                "application DB at `{url}` unreachable: {message}. \
291                 Verify Djogi.toml::database.url is reachable and the \
292                 credentials grant CONNECT.",
293            ),
294            AnalyzeError::Db(message) => write!(
295                f,
296                "live-DB query: {message}. \
297                 Verify the app DB is reachable and the role has SELECT \
298                 privilege on pg_stat_user_tables (and on partman.* if \
299                 pg_partman is installed).",
300            ),
301            AnalyzeError::Io(e) => write!(
302                f,
303                "writing analyze output: {e}. \
304                 Check stdout/stderr permissions and the workspace path.",
305            ),
306            AnalyzeError::Json(e) => write!(
307                f,
308                "encoding analyze output as JSON: {e}. \
309                 This is an internal bug — please file an issue with the \
310                 input that triggered it.",
311            ),
312        }
313    }
314}
315
316impl std::error::Error for AnalyzeError {
317    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
318        match self {
319            AnalyzeError::Io(e) => Some(e),
320            AnalyzeError::Json(e) => Some(e),
321            AnalyzeError::Config(_) | AnalyzeError::Pool { .. } | AnalyzeError::Db(_) => None,
322        }
323    }
324}
325
326impl From<std::io::Error> for AnalyzeError {
327    fn from(e: std::io::Error) -> Self {
328        AnalyzeError::Io(e)
329    }
330}
331
332impl From<serde_json::Error> for AnalyzeError {
333    fn from(e: serde_json::Error) -> Self {
334        AnalyzeError::Json(e)
335    }
336}
337
338/// Output format selector — wired up to the CLI's `--format` flag.
339///
340/// `Human` is for direct operator reading; `Json` is for CI dashboards
341/// and other machine consumers. Both paths emit deterministic output
342/// (sorted by `table_name`, no `HashMap` iteration anywhere on the
343/// rendering path) so a `diff` between yesterday's and today's run
344/// shows only real schema-health changes.
345#[derive(Debug, Clone, Copy, PartialEq, Eq)]
346pub enum AnalyzeFormat {
347    /// One sorted ASCII line per table — `table | live | dead | parts | recommendation`.
348    Human,
349    /// Pretty-printed JSON array of `{table_name, recommendation, ...}`
350    /// rows, sorted by `table_name`. Pretty rather than compact so
351    /// `git diff` between dashboard runs is reviewable.
352    Json,
353}
354
355/// Pull live-DB stats from `pg_stat_user_tables` and (optionally)
356/// `pg_partman.show_partitions(...)` for every user table.
357///
358/// # Query design
359///
360/// The primary query lists schema-qualified table names plus visibility
361/// counters and last-analyze timestamps from `pg_stat_user_tables`.
362/// That catalogue is part of every Postgres install, so this query is
363/// always available and never errors with `UNDEFINED_TABLE`.
364///
365/// The per-row partition lookup uses `partman.show_partitions($1)`
366/// which is part of the optional `pg_partman` extension. Many adopters
367/// will not have it installed; rather than refusing to run analyze on
368/// those clusters, we catch SQLSTATE `UNDEFINED_FUNCTION` (`42883`),
369/// `UNDEFINED_TABLE` (`42P01`), and `INVALID_SCHEMA_NAME` (`3F000`)
370/// from the partman call and fall back to `partition_count = 0`. The
371/// fallback collapses the partman path to "no partitions reported"
372/// rather than producing a hard error — which is the right semantic
373/// because a table that pg_partman doesn't know about is, from
374/// analyze's perspective, indistinguishable from an unpartitioned
375/// table.
376///
377/// # Determinism
378///
379/// `ORDER BY table_name` in the primary query plus a defensive
380/// `Vec::sort_by` after collection (in [`run`]) means the output
381/// ordering does not depend on Postgres planner decisions.
382///
383/// # Read-only
384///
385/// Both queries are `SELECT`-only with positional binds. No DDL, no
386/// DML — analyze never writes.
387pub async fn fetch_table_health(pool: &DjogiPool) -> Result<Vec<TableHealth>, AnalyzeError> {
388    let mut ctx = DjogiContext::from_pool(pool.clone());
389
390    // Probe `partman` schema AND `show_partitions` function existence
391    // ONCE up-front rather than letting every per-table
392    // `partman.show_partitions(...)` call discover the absence at
393    // prepare time.
394    //
395    // # Why probe up-front
396    //
397    // `prepare_cached` (the path `raw_rows` routes through) maps
398    // tokio-postgres errors via `DbError::other(e.to_string())` —
399    // which DROPS the SQLSTATE because the prepare-error path
400    // collapses the `tokio_postgres::Error` into a message-only
401    // `DbError` whose `code()` returns `None`. The original
402    // `is_partman_absent_code` SQLSTATE classifier therefore never
403    // matches against a cluster without pg_partman, and every
404    // analyze run on such a cluster fails with `AnalyzeError::Db`
405    // even though the partman call should be a soft fallback.
406    //
407    // # Why probe BOTH schema and function
408    //
409    // A schema-only check (`pg_namespace.nspname = 'partman'`) is
410    // insufficient: a partial install (e.g. the extension was
411    // dropped but the schema was preserved by a CASCADE-less
412    // `DROP EXTENSION pg_partman`, or a mid-upgrade where the
413    // schema exists but the new function shape has not been
414    // installed yet) leaves the schema present without
415    // `show_partitions`. The per-table call would then `prepare`
416    // the function reference, fail with `UNDEFINED_FUNCTION`, and
417    // hit the same SQLSTATE-dropping path described above — so the
418    // soft fallback would never engage and analyze would fail.
419    //
420    // Joining `pg_proc` against `pg_namespace.oid = pg_proc.pronamespace`
421    // confirms the function we're about to call actually exists in
422    // the partman schema. The probe returns true only when BOTH the
423    // schema and `show_partitions` are present; any partial-install
424    // shape returns false and we skip the per-table partman calls
425    // entirely (`partition_count = 0` for every row — the same
426    // outcome the SQLSTATE-fallback path would have produced for an
427    // entirely-absent extension).
428    //
429    // Both `pg_namespace` and `pg_proc` are core system catalogues —
430    // the probe is always-prepareable, returns a clean
431    // `Result<bool, _>`, and costs ONE query per analyze invocation
432    // instead of N (one per user table).
433    //
434    // The retained `query_partition_count` SQLSTATE classifier is a
435    // belt-and-braces guard for execution-time partman failures: if
436    // `show_partitions` exists at probe time but errors at runtime
437    // (e.g. permission revoked between probe and call, or the
438    // function body itself raises `UNDEFINED_TABLE` because no
439    // partitioned parents are registered), we still soft-degrade
440    // to `partition_count = 0`. That path goes through `query` not
441    // `prepare`, so the SQLSTATE survives and the classifier still
442    // catches it.
443    let partman_present = !ctx
444        .raw_rows(
445            "SELECT 1 \
446             FROM pg_namespace n \
447             JOIN pg_proc p ON p.pronamespace = n.oid \
448             WHERE n.nspname = 'partman' AND p.proname = 'show_partitions'",
449            &[],
450        )
451        .await
452        .map_err(djogi_err_to_analyze)?
453        .is_empty();
454
455    // Primary query — `pg_stat_user_tables` is always available on
456    // every Postgres cluster djogi targets (>= 18). The schema name is
457    // joined into `table_name` so partitioned tables in non-public
458    // schemas show up unambiguously in the operator output.
459    let stats_sql = "SELECT \
460                     schemaname || '.' || relname AS table_name, \
461                     n_live_tup, \
462                     n_dead_tup, \
463                     last_analyze \
464                     FROM pg_stat_user_tables \
465                     ORDER BY schemaname, relname";
466    let stats_rows = ctx
467        .raw_rows(stats_sql, &[])
468        .await
469        .map_err(djogi_err_to_analyze)?;
470
471    // Per-row partition lookup. Errors of the "extension absent" class
472    // are swallowed into `partition_count = 0`; everything else
473    // propagates as `AnalyzeError::Db`. When the up-front probe says
474    // partman is absent, we skip the per-table query entirely and
475    // assign `partition_count = 0` directly — same outcome, no
476    // per-table round-trip.
477    let mut out = Vec::with_capacity(stats_rows.len());
478    for row in stats_rows {
479        // `try_get` (not `get`) per the `raw_rows` contract: the row
480        // accessor that returns `Result<T, _>` rather than panicking on
481        // a type/decode mismatch. A future Postgres upgrade or a
482        // restrictive role granting access to `pg_stat_user_tables` with
483        // unexpected nullability could otherwise crash the CLI; routing
484        // through `AnalyzeError::Db` keeps every failure path
485        // operator-actionable.
486        let table_name: String = row
487            .try_get(0)
488            .map_err(|e| AnalyzeError::Db(format!("decoding table_name: {e}")))?;
489        let n_live_tup: i64 = row
490            .try_get(1)
491            .map_err(|e| AnalyzeError::Db(format!("decoding n_live_tup: {e}")))?;
492        let n_dead_tup: i64 = row
493            .try_get(2)
494            .map_err(|e| AnalyzeError::Db(format!("decoding n_dead_tup: {e}")))?;
495        let last_analyze: Option<time::OffsetDateTime> = row
496            .try_get(3)
497            .map_err(|e| AnalyzeError::Db(format!("decoding last_analyze: {e}")))?;
498
499        let partition_count = if partman_present {
500            match query_partition_count(&mut ctx, &table_name).await {
501                Ok(count) => count,
502                Err(PartmanError::Absent) => 0,
503                Err(PartmanError::Other(message)) => return Err(AnalyzeError::Db(message)),
504            }
505        } else {
506            0
507        };
508
509        out.push(TableHealth {
510            table_name,
511            n_live_tup,
512            n_dead_tup,
513            last_analyze,
514            partition_count,
515        });
516    }
517
518    Ok(out)
519}
520
521/// Internal error type for [`query_partition_count`]. Distinct from
522/// [`AnalyzeError`] so the caller can match on `Absent` (treat as
523/// `partition_count = 0`) without conflating with the broader
524/// `AnalyzeError::Db` channel.
525enum PartmanError {
526    /// `pg_partman` is not installed on this cluster — the
527    /// `partman.show_partitions` function or `partman` schema is
528    /// undefined. Treated as "no partitions" by the caller.
529    Absent,
530    /// Anything else — connection error, syntax error, permission
531    /// denied. Rendered into `AnalyzeError::Db`.
532    Other(String),
533}
534
535/// Query the partition count for `table_name` via
536/// `partman.show_partitions($1)`.
537///
538/// **Parameter binding.** `$1` carries `table_name`; we never
539/// `format!()`-interpolate. The table name comes from
540/// `pg_stat_user_tables` (a system catalogue, so trusted), but the
541/// codebase rule is "always parameterise" — there is no scenario in
542/// which the cost of a parameter bind matters, and a stray code path
543/// that builds the table name from an untrusted source later cannot
544/// regress this query into an injection vector.
545///
546/// Returns `Err(PartmanError::Absent)` for the three SQLSTATEs that
547/// indicate "pg_partman not installed":
548///
549/// - `42883` `UNDEFINED_FUNCTION` — `partman` schema present but
550///   `show_partitions` not (e.g. partial install).
551/// - `42P01` `UNDEFINED_TABLE` — `show_partitions` resolves but the
552///   underlying `part_config` lookup fails because no partitioned
553///   parents are registered.
554/// - `3F000` `INVALID_SCHEMA_NAME` — `partman` schema entirely
555///   absent.
556async fn query_partition_count(
557    ctx: &mut DjogiContext,
558    table_name: &str,
559) -> Result<i32, PartmanError> {
560    let sql = "SELECT count(*)::int FROM partman.show_partitions($1)";
561    match ctx.raw_rows(sql, &[&table_name]).await {
562        Ok(rows) => {
563            if let Some(row) = rows.first() {
564                // `try_get` (not `get`) per the `raw_rows` contract — a
565                // future change to `partman.show_partitions` return shape
566                // would otherwise panic the CLI instead of surfacing as
567                // a structured `Other` variant the caller routes into
568                // `AnalyzeError::Db`.
569                let count: i32 = row
570                    .try_get(0)
571                    .map_err(|e| PartmanError::Other(format!("decoding partition count: {e}")))?;
572                Ok(count)
573            } else {
574                // `count(*)` always returns one row, but defend against
575                // a future Postgres upgrade that could change the
576                // contract — reporting "0 partitions" is the right
577                // fallback if the row is somehow missing.
578                Ok(0)
579            }
580        }
581        Err(DjogiError::Db(db)) => {
582            if let Some(code) = db.code()
583                && is_partman_absent_code(code)
584            {
585                Err(PartmanError::Absent)
586            } else {
587                Err(PartmanError::Other(db.to_string()))
588            }
589        }
590        Err(other) => Err(PartmanError::Other(other.to_string())),
591    }
592}
593
594/// Map the three SQLSTATE classes that signal "pg_partman not
595/// installed" onto a single boolean. Centralised so both
596/// [`query_partition_count`] and any future caller reuse the same
597/// classification.
598fn is_partman_absent_code(code: &SqlState) -> bool {
599    *code == SqlState::UNDEFINED_FUNCTION
600        || *code == SqlState::UNDEFINED_TABLE
601        || *code == SqlState::INVALID_SCHEMA_NAME
602}
603
604/// Convert a `DjogiError` from the live-DB path into an
605/// `AnalyzeError::Db`. Rendered eagerly (`to_string()`) so the
606/// `AnalyzeError` does not need to carry the heterogeneous inner type.
607fn djogi_err_to_analyze(e: DjogiError) -> AnalyzeError {
608    AnalyzeError::Db(e.to_string())
609}
610
611/// `djogi analyze` entry point — consumed by `main.rs::TopCommand::Analyze`.
612///
613/// Orchestrates the live-DB pull, the pure recommendation pass, and
614/// the rendering. Splitting fetch / recommend / render this way means
615/// every test (unit, integration, regression) targets exactly the
616/// layer that interests it without dragging in the others.
617///
618/// # Workspace + config resolution
619///
620/// `workspace` is `None` by default — we resolve to
621/// `std::env::current_dir()` and then load `Djogi.toml` via
622/// `DjogiConfig::load_from_workspace`. Mirrors `verify::run`'s pattern.
623///
624/// # Pool lifecycle
625///
626/// One pool, one context, every per-table query runs through it.
627/// Built fresh on every invocation — analyze is a one-shot CLI command,
628/// not a long-lived process, so pool reuse across invocations is not a
629/// goal.
630///
631/// # Output destination
632///
633/// Both rendering paths write to a locked `stdout`. Locking once at
634/// the top means we don't pay the `Stdout::lock()` cost per row, and
635/// the renderers themselves take a generic `&mut W: Write` so the
636/// pure render-only tests (T10.2's `render_human_*` / `render_json_*`)
637/// can target a `Vec<u8>` without going through stdout.
638pub async fn run(
639    workspace: Option<std::path::PathBuf>,
640    format: AnalyzeFormat,
641    threshold_vacuum: f64,
642    threshold_partition_rows: i64,
643) -> Result<(), AnalyzeError> {
644    // Step 1 — resolve workspace, load config.
645    let workspace = workspace.unwrap_or_else(|| {
646        std::env::current_dir().unwrap_or_else(|_| std::path::PathBuf::from("."))
647    });
648    let config = DjogiConfig::load_from_workspace(&workspace)
649        .map_err(|e| AnalyzeError::Config(e.to_string()))?;
650
651    // Step 2 — connect to the application DB.
652    let url = config.database.url.clone();
653    let pool = DjogiPool::connect(&url)
654        .await
655        .map_err(|e| AnalyzeError::Pool {
656            url: url.clone(),
657            message: e.to_string(),
658        })?;
659    djogi::pg::preflight::check_postgres_version(&pool)
660        .await
661        .map_err(|e| AnalyzeError::Pool {
662            url: url.clone(),
663            message: format!("support boundary: {e}"),
664        })?;
665
666    // Step 3 — fetch + recommend.
667    let mut health = fetch_table_health(&pool).await?;
668    // Defence-in-depth — the SQL `ORDER BY` already sorts the rows,
669    // but we re-sort by the materialised `table_name` so the
670    // determinism contract is independent of the planner.
671    health.sort_by(|a, b| a.table_name.cmp(&b.table_name));
672
673    let report: Vec<(TableHealth, Recommendation)> = health
674        .into_iter()
675        .map(|h| {
676            let rec = recommend(&h, threshold_vacuum, threshold_partition_rows);
677            (h, rec)
678        })
679        .collect();
680
681    // Step 4 — render. Lock stdout once so the renderers don't pay the
682    // per-row lock cost and a downstream `tee` / pipe sees one
683    // contiguous stream.
684    let stdout = std::io::stdout();
685    let mut handle = stdout.lock();
686    match format {
687        AnalyzeFormat::Human => render_human(&report, &mut handle)?,
688        AnalyzeFormat::Json => render_json(&report, &mut handle)?,
689    }
690    handle.flush()?;
691    Ok(())
692}
693
694/// Render the human-readable ASCII table.
695///
696/// One header line plus one body line per table. Columns are padded so
697/// the visual alignment is stable across runs; the recommendation
698/// column carries a short tag (`vacuum`, `partition`, `parts++`,
699/// `healthy`) plus the structural detail. Empty input prints the
700/// header only — no "no rows" placeholder, so a downstream `wc -l`
701/// sees the row count directly.
702///
703/// # Determinism
704///
705/// The input is already sorted by `table_name` (see [`run`]); this
706/// renderer iterates in the input order. No `HashMap`, no
707/// `BTreeMap` — pure `Vec` iteration.
708fn render_human<W: Write>(
709    report: &[(TableHealth, Recommendation)],
710    out: &mut W,
711) -> Result<(), AnalyzeError> {
712    // Header — clippy's `write_literal` lint flags trailing
713    // `"<lit>"` arguments paired with `{}` placeholders, so we keep
714    // the format string fully width-specified and stage the header
715    // labels through bindings rather than literal slots.
716    let h_table = "TABLE";
717    let h_live = "LIVE";
718    let h_dead = "DEAD";
719    let h_parts = "PARTITIONS";
720    let h_rec = "RECOMMENDATION";
721    writeln!(
722        out,
723        "{h_table:<48} {h_live:>14} {h_dead:>14} {h_parts:>10}  {h_rec}",
724    )?;
725    for (h, r) in report {
726        writeln!(
727            out,
728            "{:<48} {:>14} {:>14} {:>10}  {}",
729            h.table_name,
730            h.n_live_tup,
731            h.n_dead_tup,
732            h.partition_count,
733            recommendation_human(r),
734        )?;
735    }
736    Ok(())
737}
738
739/// Render a single recommendation as a one-line ASCII tag for the
740/// human renderer. Stable text format — string-substring assertions on
741/// the operator output are part of the contract.
742fn recommendation_human(r: &Recommendation) -> String {
743    match r {
744        Recommendation::VacuumNeeded { dead_tup_ratio } => {
745            format!("vacuum (dead_tup_ratio={dead_tup_ratio:.4})")
746        }
747        Recommendation::PartitionRecommended { reason } => {
748            format!("partition ({reason})")
749        }
750        Recommendation::PartitionCountIncrease { current, suggested } => {
751            format!("parts++ (current={current}, suggested={suggested})")
752        }
753        Recommendation::Healthy => "healthy".to_string(),
754    }
755}
756
757/// Render the JSON array.
758///
759/// # Determinism contract (v3 §494, T10.1 review)
760///
761/// We project through a named `Row` struct rather than a
762/// `HashMap<String, _>` — `serde`'s default behaviour preserves struct
763/// field declaration order, so the JSON output is byte-stable across
764/// runs. `serde_json::to_writer_pretty` is chosen over the compact
765/// form so dashboards can `git diff` two runs without reformatting
766/// first.
767///
768/// # Field order
769///
770/// `table_name` first (sort key), then the raw counters
771/// (`n_live_tup`, `n_dead_tup`), then `last_analyze`,
772/// `partition_count`, and finally the structured `recommendation`.
773/// Pinned by `render_json_field_order_is_stable`.
774fn render_json<W: Write>(
775    report: &[(TableHealth, Recommendation)],
776    out: &mut W,
777) -> Result<(), AnalyzeError> {
778    /// Wire-format projection of one analyzed table.
779    ///
780    /// Field declaration order IS the JSON field order — see the
781    /// renderer's determinism contract above. Borrows from the
782    /// in-memory `report` so the projection is allocation-free.
783    #[derive(Serialize)]
784    struct Row<'a> {
785        table_name: &'a str,
786        n_live_tup: i64,
787        n_dead_tup: i64,
788        last_analyze: Option<time::OffsetDateTime>,
789        partition_count: i32,
790        recommendation: &'a Recommendation,
791    }
792
793    let rows: Vec<Row<'_>> = report
794        .iter()
795        .map(|(h, r)| Row {
796            table_name: &h.table_name,
797            n_live_tup: h.n_live_tup,
798            n_dead_tup: h.n_dead_tup,
799            last_analyze: h.last_analyze,
800            partition_count: h.partition_count,
801            recommendation: r,
802        })
803        .collect();
804
805    serde_json::to_writer_pretty(&mut *out, &rows)?;
806    // serde_json::to_writer_pretty does NOT emit a trailing newline;
807    // adding one keeps the output well-behaved for shell pipelines
808    // (e.g. `djogi analyze --format json | jq .`).
809    writeln!(out)?;
810    Ok(())
811}
812
813#[cfg(test)]
814mod tests {
815    //! Pure unit tests covering every arm of `Recommendation` plus
816    //! precedence ordering and determinism. None of these tests touch
817    //! the network, the filesystem, or a database — they construct
818    //! `TableHealth` values directly and assert on the returned
819    //! `Recommendation`.
820
821    use super::*;
822
823    /// Helper: build a `TableHealth` with sensible defaults so tests
824    /// only override the fields they care about. Centralising the
825    /// builder keeps test bodies focused on the rule being exercised.
826    fn health(n_live_tup: i64, n_dead_tup: i64, partition_count: i32) -> TableHealth {
827        TableHealth {
828            table_name: "test_table".to_string(),
829            n_live_tup,
830            n_dead_tup,
831            last_analyze: None,
832            partition_count,
833        }
834    }
835
836    #[test]
837    fn recommend_healthy_when_below_all_thresholds() {
838        // Small table, no dead tuples, plenty of partition headroom.
839        let h = health(1_000, 0, 0);
840        assert_eq!(recommend(&h, 0.2, 10_000_000), Recommendation::Healthy);
841
842        // Same idea but partitioned and well below the per-partition
843        // ceiling — also Healthy.
844        let h = health(100_000, 0, 4);
845        assert_eq!(recommend(&h, 0.2, 10_000_000), Recommendation::Healthy);
846
847        // Genuinely empty table — Healthy (vacuum guard skips the
848        // division, partition checks don't fire below threshold).
849        let h = health(0, 0, 0);
850        assert_eq!(recommend(&h, 0.2, 10_000_000), Recommendation::Healthy);
851    }
852
853    #[test]
854    fn recommend_vacuum_when_dead_tup_ratio_high() {
855        // Just above 0.2 — fires.
856        // 21 dead / (79 live + 21 dead) = 0.21
857        let h = health(79, 21, 0);
858        match recommend(&h, 0.2, 10_000_000) {
859            Recommendation::VacuumNeeded { dead_tup_ratio } => {
860                assert!(
861                    dead_tup_ratio > 0.20 && dead_tup_ratio < 0.22,
862                    "expected ratio near 0.21, got {dead_tup_ratio}"
863                );
864            }
865            other => panic!("expected VacuumNeeded, got {other:?}"),
866        }
867
868        // Just below 0.2 — does NOT fire.
869        // 19 dead / 100 = 0.19
870        let h = health(81, 19, 0);
871        assert_eq!(recommend(&h, 0.2, 10_000_000), Recommendation::Healthy);
872
873        // Exactly at 0.2 — does NOT fire (strict greater-than).
874        let h = health(80, 20, 0);
875        assert_eq!(recommend(&h, 0.2, 10_000_000), Recommendation::Healthy);
876
877        // High ratio (50%) — fires unambiguously.
878        let h = health(50, 50, 0);
879        match recommend(&h, 0.2, 10_000_000) {
880            Recommendation::VacuumNeeded { dead_tup_ratio } => {
881                assert!((dead_tup_ratio - 0.5).abs() < 1e-9);
882            }
883            other => panic!("expected VacuumNeeded, got {other:?}"),
884        }
885    }
886
887    #[test]
888    fn recommend_partition_when_unpartitioned_and_large() {
889        // Just above 10M rows, no dead tuples, no partitions.
890        let h = health(10_000_001, 0, 0);
891        match recommend(&h, 0.2, 10_000_000) {
892            Recommendation::PartitionRecommended { reason } => {
893                assert!(reason.contains("10000001"), "reason: {reason}");
894                assert!(reason.contains("not partitioned"), "reason: {reason}");
895                assert!(reason.contains("threshold: 10000000"), "reason: {reason}");
896            }
897            other => panic!("expected PartitionRecommended, got {other:?}"),
898        }
899
900        // Exactly at threshold — does NOT fire (strict greater-than).
901        let h = health(10_000_000, 0, 0);
902        assert_eq!(recommend(&h, 0.2, 10_000_000), Recommendation::Healthy);
903
904        // Way over threshold but already partitioned — does NOT fire
905        // the unpartitioned rule (CountIncrease may, see below).
906        let h = health(20_000_000, 0, 100);
907        // 20M / 100 = 200k average → below 10M threshold → Healthy.
908        assert_eq!(recommend(&h, 0.2, 10_000_000), Recommendation::Healthy);
909    }
910
911    #[test]
912    fn recommend_partition_count_increase_when_partitions_undersized() {
913        // 100M rows across 4 partitions = 25M each → exceeds 10M.
914        let h = health(100_000_000, 0, 4);
915        assert_eq!(
916            recommend(&h, 0.2, 10_000_000),
917            Recommendation::PartitionCountIncrease {
918                current: 4,
919                suggested: 8,
920            }
921        );
922
923        // Saturating-mul guard: even pathological partition counts
924        // produce a valid `i32` suggestion.
925        let h = health(i64::MAX / 2, 0, i32::MAX);
926        match recommend(&h, 0.2, 10_000_000) {
927            Recommendation::PartitionCountIncrease { current, suggested } => {
928                assert_eq!(current, i32::MAX);
929                assert_eq!(suggested, i32::MAX); // saturated
930            }
931            other => panic!("expected PartitionCountIncrease, got {other:?}"),
932        }
933    }
934
935    #[test]
936    fn recommend_is_deterministic() {
937        // Build a single TableHealth and run recommend() 100 times;
938        // every result must equal the first. Covers the v3 §494
939        // concern about HashMap-iteration nondeterminism — there is no
940        // HashMap in `recommend`, but the test cements the contract so
941        // future refactors don't sneak one in.
942        let h = health(50_000_000, 0, 3);
943        let baseline = recommend(&h, 0.2, 10_000_000);
944
945        for i in 0..100 {
946            let result = recommend(&h, 0.2, 10_000_000);
947            assert_eq!(
948                result, baseline,
949                "iteration {i} diverged from baseline {baseline:?}"
950            );
951        }
952
953        // Same shape with the VacuumNeeded arm — float math should
954        // also be bit-stable across repeated invocations on the same
955        // inputs.
956        let h = health(70, 30, 0);
957        let baseline = recommend(&h, 0.2, 10_000_000);
958        for i in 0..100 {
959            assert_eq!(
960                recommend(&h, 0.2, 10_000_000),
961                baseline,
962                "vacuum iteration {i} diverged"
963            );
964        }
965    }
966
967    #[test]
968    fn recommend_vacuum_dominates_partition() {
969        // Both VacuumNeeded AND PartitionRecommended would fire in
970        // isolation — vacuum wins per precedence ordering.
971        //
972        // Setup: 100M live + 50M dead → 33% dead ratio AND
973        // unpartitioned over the 10M row threshold.
974        let h = health(100_000_000, 50_000_000, 0);
975        match recommend(&h, 0.2, 10_000_000) {
976            Recommendation::VacuumNeeded { dead_tup_ratio } => {
977                assert!((dead_tup_ratio - (50.0 / 150.0)).abs() < 1e-9);
978            }
979            other => panic!("expected VacuumNeeded (precedence), got {other:?}"),
980        }
981    }
982
983    #[test]
984    fn recommend_partition_dominates_count_increase() {
985        // An unpartitioned table cannot trigger CountIncrease at all
986        // (CountIncrease requires `partition_count > 0`), so this test
987        // pins the precedence boundary the other way: a partitioned
988        // table that crosses both the size *and* the per-partition
989        // ceiling falls into CountIncrease — there is no way for
990        // PartitionRecommended to fire on a partitioned table by
991        // construction.
992        //
993        // Rule still under test: when both partition rules *could*
994        // logically apply, PartitionRecommended only matches the
995        // unpartitioned case (`partition_count == 0`).
996        let h = health(100_000_000, 0, 4); // partitioned, 25M/partition
997        match recommend(&h, 0.2, 10_000_000) {
998            Recommendation::PartitionCountIncrease { current, suggested } => {
999                assert_eq!(current, 4);
1000                assert_eq!(suggested, 8);
1001            }
1002            other => panic!("expected PartitionCountIncrease, got {other:?}"),
1003        }
1004
1005        // And the inverse: unpartitioned + over threshold goes to
1006        // PartitionRecommended, NOT CountIncrease.
1007        let h = health(100_000_000, 0, 0);
1008        assert!(matches!(
1009            recommend(&h, 0.2, 10_000_000),
1010            Recommendation::PartitionRecommended { .. }
1011        ));
1012    }
1013
1014    #[test]
1015    fn recommend_handles_n_tup_addition_overflow() {
1016        // Both counters at `i64::MAX` would panic in debug or silently
1017        // wrap in release under unchecked addition. `saturating_add`
1018        // caps at `i64::MAX`, so the ratio is `i64::MAX / i64::MAX = 1.0`
1019        // — well above the default 0.2 threshold, so VacuumNeeded fires.
1020        // The test pins the contract: pathological stats values must NOT
1021        // panic and must still produce a deterministic recommendation.
1022        let h = TableHealth {
1023            table_name: "boom".to_string(),
1024            n_live_tup: i64::MAX,
1025            n_dead_tup: i64::MAX,
1026            last_analyze: None,
1027            partition_count: 0,
1028        };
1029        let result = recommend(&h, 0.2, 10_000_000);
1030        match result {
1031            Recommendation::VacuumNeeded { dead_tup_ratio } => {
1032                // i64::MAX / i64::MAX (saturated) = 1.0
1033                assert!(
1034                    (dead_tup_ratio - 1.0).abs() < 1e-9,
1035                    "expected ratio 1.0, got {dead_tup_ratio}"
1036                );
1037            }
1038            other => panic!("expected VacuumNeeded, got {other:?}"),
1039        }
1040    }
1041
1042    /// Helper for the render-only tests: construct a small report with
1043    /// every recommendation arm represented. Sorted by table_name so
1044    /// the renderer's input mirrors what `run` would produce.
1045    fn fixture_report() -> Vec<(TableHealth, Recommendation)> {
1046        vec![
1047            (health(1_000, 0, 0), Recommendation::Healthy),
1048            (
1049                TableHealth {
1050                    table_name: "public.events".to_string(),
1051                    n_live_tup: 100_000_000,
1052                    n_dead_tup: 0,
1053                    last_analyze: None,
1054                    partition_count: 0,
1055                },
1056                Recommendation::PartitionRecommended {
1057                    reason:
1058                        "table has 100000000 live rows but is not partitioned (threshold: 10000000)"
1059                            .to_string(),
1060                },
1061            ),
1062            (
1063                TableHealth {
1064                    table_name: "public.orders".to_string(),
1065                    n_live_tup: 100_000_000,
1066                    n_dead_tup: 0,
1067                    last_analyze: None,
1068                    partition_count: 4,
1069                },
1070                Recommendation::PartitionCountIncrease {
1071                    current: 4,
1072                    suggested: 8,
1073                },
1074            ),
1075            (
1076                TableHealth {
1077                    table_name: "public.users".to_string(),
1078                    n_live_tup: 50,
1079                    n_dead_tup: 50,
1080                    last_analyze: None,
1081                    partition_count: 0,
1082                },
1083                Recommendation::VacuumNeeded {
1084                    dead_tup_ratio: 0.5,
1085                },
1086            ),
1087        ]
1088    }
1089
1090    /// Render-only test: human format produces a sorted ASCII table
1091    /// whose body lines appear in input order. Pinning the format
1092    /// stops a future "improve the visuals" refactor from breaking
1093    /// downstream operator scripts that grep for substrings.
1094    #[test]
1095    fn render_human_lists_every_table_in_input_order() {
1096        let report = fixture_report();
1097        let mut buf = Vec::new();
1098        render_human(&report, &mut buf).expect("render");
1099        let s = String::from_utf8(buf).expect("utf8");
1100
1101        // Header is present.
1102        assert!(s.contains("TABLE"), "missing header: {s}");
1103        assert!(s.contains("RECOMMENDATION"), "missing header: {s}");
1104
1105        // Each fixture table appears exactly once in the body.
1106        for table in [
1107            "test_table",
1108            "public.events",
1109            "public.orders",
1110            "public.users",
1111        ] {
1112            assert_eq!(
1113                s.matches(table).count(),
1114                1,
1115                "table `{table}` should appear once: {s}"
1116            );
1117        }
1118
1119        // Every recommendation tag shows up with its expected substring.
1120        assert!(s.contains("healthy"), "healthy tag missing: {s}");
1121        assert!(
1122            s.contains("vacuum (dead_tup_ratio="),
1123            "vacuum tag missing: {s}"
1124        );
1125        assert!(
1126            s.contains("partition (table has 100000000"),
1127            "partition tag missing: {s}"
1128        );
1129        assert!(
1130            s.contains("parts++ (current=4, suggested=8)"),
1131            "parts++ tag missing: {s}"
1132        );
1133
1134        // Body line order matches input order.
1135        let test_idx = s.find("test_table").unwrap();
1136        let events_idx = s.find("public.events").unwrap();
1137        let orders_idx = s.find("public.orders").unwrap();
1138        let users_idx = s.find("public.users").unwrap();
1139        assert!(
1140            test_idx < events_idx && events_idx < orders_idx && orders_idx < users_idx,
1141            "input order not preserved: {s}"
1142        );
1143    }
1144
1145    /// Regression sentinel — same input must produce byte-identical
1146    /// output across repeated invocations. Cements the determinism
1147    /// contract that `run` orchestrates: sort then render.
1148    #[test]
1149    fn render_human_is_byte_stable() {
1150        let report = fixture_report();
1151        let mut a = Vec::new();
1152        let mut b = Vec::new();
1153        render_human(&report, &mut a).unwrap();
1154        render_human(&report, &mut b).unwrap();
1155        assert_eq!(a, b, "render_human is not byte-stable");
1156    }
1157
1158    /// Render-only test: JSON format parses through `serde_json::from_slice`
1159    /// as a non-empty array of well-formed objects, with every recommendation
1160    /// arm represented and the field order matching the projection struct.
1161    #[test]
1162    fn render_json_parses_and_field_order_is_stable() {
1163        let report = fixture_report();
1164        let mut buf = Vec::new();
1165        render_json(&report, &mut buf).expect("render");
1166
1167        // 1. Parses through serde_json::Value (smoke test).
1168        let parsed: serde_json::Value =
1169            serde_json::from_slice(&buf).expect("output must be valid JSON");
1170        let array = parsed.as_array().expect("output must be a JSON array");
1171        assert_eq!(array.len(), 4, "expected 4 fixture rows: {parsed}");
1172
1173        // 2. Every recommendation kind shows up at least once.
1174        let kinds: Vec<&str> = array
1175            .iter()
1176            .filter_map(|row| row.get("recommendation"))
1177            .filter_map(|rec| rec.get("kind"))
1178            .filter_map(|k| k.as_str())
1179            .collect();
1180        assert!(kinds.contains(&"healthy"), "kinds: {kinds:?}");
1181        assert!(kinds.contains(&"vacuum_needed"), "kinds: {kinds:?}");
1182        assert!(kinds.contains(&"partition_recommended"), "kinds: {kinds:?}");
1183        assert!(
1184            kinds.contains(&"partition_count_increase"),
1185            "kinds: {kinds:?}"
1186        );
1187
1188        // 3. Field order on the first row matches the Row struct.
1189        // serde_json with `preserve_order` enabled (workspace dep
1190        // feature) preserves insertion order on Map deserialisation;
1191        // we walk the keys and assert the declared order.
1192        let first = array.first().expect("non-empty");
1193        let obj = first.as_object().expect("row must be an object");
1194        let keys: Vec<&str> = obj.keys().map(String::as_str).collect();
1195        assert_eq!(
1196            keys,
1197            vec![
1198                "table_name",
1199                "n_live_tup",
1200                "n_dead_tup",
1201                "last_analyze",
1202                "partition_count",
1203                "recommendation",
1204            ],
1205            "field order must match Row struct declaration: {keys:?}"
1206        );
1207
1208        // 4. Body is byte-stable across two renders.
1209        let mut buf2 = Vec::new();
1210        render_json(&report, &mut buf2).unwrap();
1211        assert_eq!(buf, buf2, "render_json is not byte-stable");
1212    }
1213
1214    /// Empty input produces an empty JSON array — no crash, no
1215    /// "no rows" placeholder, just `[]\n` so downstream JSON parsers
1216    /// see a valid empty list.
1217    #[test]
1218    fn render_json_handles_empty_input() {
1219        let report: Vec<(TableHealth, Recommendation)> = Vec::new();
1220        let mut buf = Vec::new();
1221        render_json(&report, &mut buf).expect("render");
1222        let s = String::from_utf8(buf).expect("utf8");
1223        // `to_writer_pretty` emits `[]` for an empty Vec; the renderer
1224        // appends a newline.
1225        assert_eq!(s, "[]\n", "empty input should produce `[]\\n`, got: {s:?}");
1226    }
1227
1228    /// Empty input produces just the header line in the human renderer.
1229    /// Pins the no-rows behaviour so a downstream `wc -l` sees the row
1230    /// count directly (header + 0 body lines = 1).
1231    #[test]
1232    fn render_human_handles_empty_input() {
1233        let report: Vec<(TableHealth, Recommendation)> = Vec::new();
1234        let mut buf = Vec::new();
1235        render_human(&report, &mut buf).expect("render");
1236        let s = String::from_utf8(buf).expect("utf8");
1237        // Exactly one line — the header.
1238        assert_eq!(
1239            s.lines().count(),
1240            1,
1241            "expected just a header line, got: {s:?}"
1242        );
1243        assert!(s.contains("TABLE"));
1244    }
1245
1246    /// SQLSTATE classifier covers the three "pg_partman absent"
1247    /// codes and rejects anything else. Pure unit test — no DB.
1248    #[test]
1249    fn is_partman_absent_code_recognises_three_states() {
1250        assert!(is_partman_absent_code(&SqlState::UNDEFINED_FUNCTION));
1251        assert!(is_partman_absent_code(&SqlState::UNDEFINED_TABLE));
1252        assert!(is_partman_absent_code(&SqlState::INVALID_SCHEMA_NAME));
1253        // Non-absence codes must NOT count as partman-absent.
1254        assert!(!is_partman_absent_code(&SqlState::SYNTAX_ERROR));
1255        assert!(!is_partman_absent_code(&SqlState::INSUFFICIENT_PRIVILEGE));
1256        assert!(!is_partman_absent_code(&SqlState::CONNECTION_FAILURE));
1257    }
1258
1259    /// `AnalyzeError::Display` must surface operator-actionable text
1260    /// for every variant — the CLI prints these straight to stderr.
1261    /// Every arm carries a remediation hint with a verb keyword
1262    /// (`Verify` / `Check` / `file an issue`). The keyword assertion is
1263    /// the regression sentinel: a future drift that drops the hint
1264    /// trips the test rather than silently regressing operator UX.
1265    /// All five variants are covered (Config, Pool, Db, Io, Json).
1266    #[test]
1267    fn analyze_error_display_is_operator_actionable() {
1268        // Config — surfaces the cause AND a remediation pointer to the
1269        // workspace + [database] section.
1270        let cfg = AnalyzeError::Config("bad toml".to_string());
1271        let s = format!("{cfg}");
1272        assert!(s.contains("config load"), "config display: {s}");
1273        assert!(s.contains("bad toml"), "config display: {s}");
1274        assert!(
1275            s.contains("Verify"),
1276            "config display must carry a remediation keyword: {s}"
1277        );
1278        assert!(
1279            s.contains("Djogi.toml"),
1280            "config display must point at Djogi.toml: {s}"
1281        );
1282
1283        // Pool — surfaces the URL, the cause, AND a remediation pointer
1284        // to Djogi.toml::database.url.
1285        let pool = AnalyzeError::Pool {
1286            url: "postgres://localhost/x".to_string(),
1287            message: "refused".to_string(),
1288        };
1289        let s = format!("{pool}");
1290        assert!(s.contains("postgres://localhost/x"), "pool display: {s}");
1291        assert!(s.contains("refused"), "pool display: {s}");
1292        assert!(
1293            s.contains("Djogi.toml::database.url"),
1294            "pool display must point at config: {s}"
1295        );
1296        assert!(
1297            s.contains("Verify"),
1298            "pool display must carry a remediation keyword: {s}"
1299        );
1300
1301        // Db — surfaces the cause AND a remediation pointer to
1302        // privileges + reachability.
1303        let db = AnalyzeError::Db("relation \"foo\" does not exist".to_string());
1304        let s = format!("{db}");
1305        assert!(s.contains("live-DB query"), "db display: {s}");
1306        assert!(s.contains("relation \"foo\""), "db display: {s}");
1307        assert!(
1308            s.contains("Verify"),
1309            "db display must carry a remediation keyword: {s}"
1310        );
1311        assert!(
1312            s.contains("pg_stat_user_tables"),
1313            "db display must mention the catalogue we query: {s}"
1314        );
1315
1316        // Io — surfaces the cause AND a remediation pointer to
1317        // stdout/stderr permissions.
1318        let io = AnalyzeError::Io(std::io::Error::other("broken pipe"));
1319        let s = format!("{io}");
1320        assert!(s.contains("writing analyze output"), "io display: {s}");
1321        assert!(s.contains("broken pipe"), "io display: {s}");
1322        assert!(
1323            s.contains("Check"),
1324            "io display must carry a remediation keyword: {s}"
1325        );
1326
1327        // Json — was previously omitted; covered now. Surfaces the cause
1328        // AND a "file an issue" hint because reaching this arm implies
1329        // an internal serde bug rather than operator misconfiguration.
1330        let json_err: serde_json::Error =
1331            serde_json::from_str::<serde_json::Value>("not json").unwrap_err();
1332        let json = AnalyzeError::Json(json_err);
1333        let s = format!("{json}");
1334        assert!(s.contains("encoding analyze output"), "json display: {s}");
1335        assert!(
1336            s.contains("file an issue"),
1337            "json display must point at the issue tracker: {s}"
1338        );
1339    }
1340}