rivet/preflight/
mod.rs

1mod analysis;
2mod cdc_health;
3pub(crate) mod cursor_expr;
4mod doctor;
5mod mongo;
6mod mssql;
7mod mysql;
8mod postgres;
9mod schema_error;
10pub mod type_report;
11
12pub(crate) use analysis::chunk_sparsity_from_counts;
13// Re-exported so the plan layer's strategy explainer can ground its "≥ threshold"
14// narrative on the same constant `check`/`init` use, not a hard-coded copy.
15pub(crate) use analysis::SMALL_TABLE_ROW_THRESHOLD;
16#[cfg(test)]
17use analysis::{
18    build_suggestion, check_connection_limit, check_dense_surrogate_cost,
19    check_parallel_memory_risk, check_sparse_range, compute_verdict, derive_strategy,
20    recommend_parallelism, recommend_profile,
21};
22#[allow(unused_imports)]
23pub use doctor::doctor;
24// Reused at the run-time connect seam (src/pipeline/single.rs) so a failed
25// `rivet run` carries the same category + remediation hint `rivet doctor` gives.
26pub(crate) use doctor::{categorize_source_error, source_error_hint};
27#[cfg(test)]
28use postgres::{extract_scan_type, parse_pg_row_estimate};
29
30use serde::Serialize;
31
32use crate::config::{Config, ExportConfig, SourceType};
33use crate::error::Result;
34use crate::types::policy::TypePolicy;
35use crate::types::target::{ExportTarget, TargetStatus};
36
37/// Serializes lowercase ("efficient"/"acceptable"/"degraded"/"unsafe") so
38/// `rivet check --json` consumers (CI gates, orchestrators) match on a stable,
39/// case-insensitive token rather than the SHOUTING `Display` form used in the
40/// human-readable table.
41#[derive(Debug, Serialize)]
42#[serde(rename_all = "lowercase")]
43pub enum HealthVerdict {
44    Efficient,
45    Acceptable,
46    Degraded,
47    Unsafe,
48}
49
50impl std::fmt::Display for HealthVerdict {
51    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
52        match self {
53            Self::Efficient => write!(f, "EFFICIENT"),
54            Self::Acceptable => write!(f, "ACCEPTABLE"),
55            Self::Degraded => write!(f, "DEGRADED"),
56            Self::Unsafe => write!(f, "UNSAFE"),
57        }
58    }
59}
60
61pub(crate) struct ExportDiagnostic {
62    pub export_name: String,
63    pub strategy: String,
64    pub mode: String,
65    pub cursor_column: Option<String>,
66    pub row_estimate: Option<i64>,
67    /// Average bytes per row from catalog/plan stats (PG EXPLAIN `width`,
68    /// MSSQL `dm_db_partition_stats` pages/row). `None` when unavailable
69    /// (e.g. MySQL, with no trustworthy scan-free estimate). Feeds the
70    /// oversized-chunk warning and is shown as the `Row width` line.
71    pub avg_row_bytes: Option<i64>,
72    pub cursor_min: Option<String>,
73    pub cursor_max: Option<String>,
74    pub scan_type: Option<String>,
75    pub uses_index: bool,
76    pub verdict: HealthVerdict,
77    pub recommended_profile: &'static str,
78    pub recommended_parallel: (u32, &'static str),
79    pub warnings: Vec<analysis::Warning>,
80    pub suggestion: Option<String>,
81}
82
83// Hand-rolled `Serialize` (rather than `#[derive]`) so the JSON shape stays
84// fully under our control without touching the three engine construction sites:
85//   - `recommended_parallel` (a raw `(u32, &str)`) becomes a self-describing
86//     `{ "level": N, "reason": "…" }` object instead of a positional 2-array;
87//   - a derived `capabilities` object ({uses_index, has_cursor, can_parallel})
88//     is computed from the sibling fields at serialization time — no stored
89//     field, no extra probe;
90//   - `None` optionals are skipped to keep the object lean for CI consumers.
91// `HealthVerdict` rides its own `#[derive(Serialize)]` (lowercase tokens).
92impl Serialize for ExportDiagnostic {
93    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
94    where
95        S: serde::Serializer,
96    {
97        use serde::ser::SerializeMap;
98
99        #[derive(Serialize)]
100        struct RecommendedParallel {
101            level: u32,
102            reason: &'static str,
103        }
104        #[derive(Serialize)]
105        struct Capabilities {
106            uses_index: bool,
107            has_cursor: bool,
108            can_parallel: bool,
109        }
110
111        let mut map = serializer.serialize_map(None)?;
112        map.serialize_entry("export_name", &self.export_name)?;
113        map.serialize_entry("strategy", &self.strategy)?;
114        map.serialize_entry("mode", &self.mode)?;
115        if let Some(v) = &self.cursor_column {
116            map.serialize_entry("cursor_column", v)?;
117        }
118        if let Some(v) = &self.row_estimate {
119            map.serialize_entry("row_estimate", v)?;
120        }
121        if let Some(v) = &self.avg_row_bytes {
122            map.serialize_entry("avg_row_bytes", v)?;
123        }
124        if let Some(v) = &self.cursor_min {
125            map.serialize_entry("cursor_min", v)?;
126        }
127        if let Some(v) = &self.cursor_max {
128            map.serialize_entry("cursor_max", v)?;
129        }
130        if let Some(v) = &self.scan_type {
131            map.serialize_entry("scan_type", v)?;
132        }
133        map.serialize_entry("uses_index", &self.uses_index)?;
134        map.serialize_entry("verdict", &self.verdict)?;
135        map.serialize_entry("recommended_profile", &self.recommended_profile)?;
136        map.serialize_entry(
137            "recommended_parallel",
138            &RecommendedParallel {
139                level: self.recommended_parallel.0,
140                reason: self.recommended_parallel.1,
141            },
142        )?;
143        map.serialize_entry("warnings", &self.warnings)?;
144        if let Some(v) = &self.suggestion {
145            map.serialize_entry("suggestion", v)?;
146        }
147        map.serialize_entry(
148            "capabilities",
149            &Capabilities {
150                uses_index: self.uses_index,
151                has_cursor: self.cursor_column.is_some(),
152                can_parallel: self.recommended_parallel.0 > 1,
153            },
154        )?;
155        map.end()
156    }
157}
158
159/// Return the diagnostic for a single export without printing anything.
160///
161/// Used by `rivet plan` to capture preflight data into a `PlanArtifact`.
162pub(crate) fn get_export_diagnostic(
163    config: &Config,
164    export: &ExportConfig,
165) -> Result<ExportDiagnostic> {
166    let url = config.source.resolve_url()?;
167    let tls = config.source.tls.as_ref();
168    crate::source::warn_if_tls_disabled(&config.source);
169    match config.source.source_type {
170        SourceType::Postgres => postgres::diagnose_export_pg(&url, tls, export),
171        SourceType::Mysql => mysql::diagnose_export_mysql(&url, tls, export),
172        SourceType::Mssql => mssql::diagnose_export_mssql(&url, tls, export),
173        SourceType::Mongo => mongo::diagnose_export_mongo(&url, tls, export),
174    }
175}
176
177/// Dedup identity for a destination, shared by `check`'s credential probe
178/// and `doctor`'s write probe. Must include every field that changes where
179/// a probe lands — notably `path`, so two local destinations with different
180/// paths are probed separately. Keeping one helper prevents the two call
181/// sites from drifting apart (doctor's inline copy once omitted `path` and
182/// silently skipped the second local destination).
183fn destination_identity(d: &crate::config::DestinationConfig) -> String {
184    format!(
185        "{:?}:{}:{}:{}",
186        d.destination_type,
187        d.bucket.as_deref().unwrap_or("-"),
188        d.endpoint.as_deref().unwrap_or("-"),
189        d.path.as_deref().unwrap_or("-"),
190    )
191}
192
193/// One-line note for the "fail ✗ but rc 0" case: a column rendered `fail ✗`
194/// for `--target` does NOT gate the exit code unless `--strict` is also passed
195/// (that is the gate by design). Without this note an operator or CI reading
196/// the glyph alone would wrongly assume a non-zero exit. Pure so the exact text
197/// is unit-tested.
198fn target_fail_note(n: usize, target_label: &str) -> String {
199    let col = if n == 1 { "column" } else { "columns" };
200    format!(
201        "Note: {n} {col} FAIL {target_label} compatibility; exit code is gated only with --strict (currently exit 0)"
202    )
203}
204
205/// Build one [`ExportDiagnostic`] per export via `diagnose`, collecting them (or
206/// short-circuiting on the first error). Single-sources the connect → loop →
207/// return contract every engine's `check_*` shares — only the per-export
208/// `diagnose` call differs — so the deferred print-vs-collect decision lives in
209/// one place rather than triplicated across postgres / mysql / mssql.
210pub(super) fn collect_diagnostics<F>(
211    exports: &[&ExportConfig],
212    mut diagnose: F,
213) -> Result<Vec<ExportDiagnostic>>
214where
215    F: FnMut(&ExportConfig) -> Result<ExportDiagnostic>,
216{
217    exports.iter().map(|&e| diagnose(e)).collect()
218}
219
220pub fn check(
221    config_path: &str,
222    export_name: Option<&str>,
223    params: Option<&std::collections::HashMap<String, String>>,
224    show_type_report: bool,
225    strict: bool,
226    json_output: bool,
227    target: Option<ExportTarget>,
228) -> Result<bool> {
229    let config = Config::load_with_params(config_path, params)?;
230
231    let exports: Vec<&ExportConfig> = if let Some(name) = export_name {
232        let e = config
233            .exports
234            .iter()
235            .find(|e| e.name == name)
236            .ok_or_else(|| anyhow::anyhow!("export '{}' not found in config", name))?;
237        vec![e]
238    } else {
239        config.exports.iter().collect()
240    };
241
242    let url = config.source.resolve_url()?;
243    let tls = config.source.tls.as_ref();
244    // Surface the plaintext-transport warning at preflight time too —
245    // operators should hear it from `rivet check` before they wait
246    // through a full `rivet run` to learn the same thing. `Once` inside
247    // the helper keeps emission to one line per process even when both
248    // `check` and `run` flow through it.
249    crate::source::warn_if_tls_disabled(&config.source);
250    // Each engine connects once and returns one diagnostic per export without
251    // printing. Rendering is decided here: TEXT (the per-export table) for the
252    // default human path, or — under `--json` — the diagnostic is merged into
253    // each export's type-report JSON object below (see the `if show_type_report`
254    // block). `get_export_diagnostic` already proved the diag is computable
255    // without printing; this is the multi-export variant of that path.
256    let diagnostics: Vec<ExportDiagnostic> = match config.source.source_type {
257        SourceType::Postgres => postgres::check_postgres(&url, tls, &exports)?,
258        SourceType::Mysql => mysql::check_mysql(&url, tls, &exports)?,
259        SourceType::Mssql => mssql::check_mssql(&url, tls, &exports)?,
260        SourceType::Mongo => mongo::check_mongo(&url, tls, &exports)?,
261    };
262    if !json_output {
263        for diag in &diagnostics {
264            print_diagnostic(diag);
265        }
266    } else if !show_type_report {
267        // `--json` WITHOUT a type report (the CLI forces `show_type_report` on
268        // under `--json`, so this only fires if `check` is called directly with
269        // `json_output=true, show_type_report=false`): there is no per-export
270        // type-report object to nest the diagnostic into, so emit the diagnostic
271        // alone — still NDJSON, one object per export per line. Keeps the
272        // verdict from being silently dropped regardless of caller.
273        for diag in &diagnostics {
274            println!("{}", serde_json::to_string(diag)?);
275        }
276    }
277    // Under `--json` WITH a type report, the diagnostics are emitted nested
278    // inside each export's type-report object (the `show_type_report` block)
279    // rather than as a standalone array — see the design note there. Built only
280    // on the `--json` path (empty otherwise) since the TEXT path printed above.
281    let diag_by_export: std::collections::HashMap<&str, &ExportDiagnostic> = if json_output {
282        diagnostics
283            .iter()
284            .map(|d| (d.export_name.as_str(), d))
285            .collect()
286    } else {
287        std::collections::HashMap::new()
288    };
289
290    // Destination credential-resolution preflight.  Until 0.7.6 `check` only
291    // probed the source: a config with `AWS_ACCESS_KEY_ID` unset would pass
292    // `rivet check` (rc=0) and then explode on `run`, while `rivet doctor`
293    // caught it.  We don't issue a write-probe here (that is `doctor`'s job
294    // and has side effects) — but we *do* call `create_destination`, which
295    // resolves env vars / credentials_file existence at construction time.
296    // Each unique destination is probed once per `check` to keep multi-export
297    // configs cheap.
298    let mut seen_destinations: std::collections::HashSet<String> = std::collections::HashSet::new();
299    for export in &exports {
300        let dest_key = destination_identity(&export.destination);
301        if !seen_destinations.insert(dest_key) {
302            continue;
303        }
304        let expanded = crate::plan::build::expand_destination_templates(
305            export.destination.clone(),
306            &export.name,
307        );
308        crate::destination::create_destination(&expanded).map_err(|e| {
309            anyhow::anyhow!(
310                "export '{}': destination preflight failed: {:#}",
311                export.name,
312                e
313            )
314        })?;
315    }
316
317    // Whether the check ends clean (no strict failure, no target-fail column).
318    // Stays true for the default `rivet check` (no type report), so the "Next:
319    // rivet run" pointer still fires.
320    let mut clean = true;
321
322    if show_type_report {
323        let policy = if strict {
324            TypePolicy::strict()
325        } else {
326            TypePolicy::warn_only()
327        };
328
329        let mut any_fatal = false;
330        // Count hard target-FAIL columns (and remember which target) so that —
331        // when --strict was NOT passed and the exit code is therefore 0 — we can
332        // print a note. The "fail ✗" glyph in the table implies a hard failure,
333        // but exit is gated only by --strict; without this note an operator or CI
334        // reading the glyph alone would be misled into thinking rc != 0.
335        let mut target_fail_cols = 0usize;
336        let mut target_fail_label: Option<&'static str> = None;
337        for export in &exports {
338            let column_overrides =
339                crate::plan::parse_column_overrides_pub(&export.columns, &export.name)?;
340            // CLI `--target` wins; otherwise fall back to the per-export
341            // `target:` from the config (slice #2a). A declared-but-unknown
342            // target is a loud error — never silently ignored.
343            if let Some(t) = export.target.as_deref()
344                && crate::types::target::ExportTarget::parse(t).is_none()
345            {
346                anyhow::bail!(
347                    "export '{}': unknown target '{t}' (expected: {})",
348                    export.name,
349                    crate::types::target::ExportTarget::valid_target_names()
350                );
351            }
352            let eff_target = target.or_else(|| {
353                export
354                    .target
355                    .as_deref()
356                    .and_then(crate::types::target::ExportTarget::parse)
357            });
358            let config_dir = std::path::Path::new(config_path)
359                .parent()
360                .unwrap_or_else(|| std::path::Path::new("."));
361            match type_report::collect_report(
362                &config,
363                export,
364                &column_overrides,
365                &policy,
366                eff_target,
367                config_dir,
368                params,
369            ) {
370                Ok(report) => {
371                    if report.has_fatal() {
372                        any_fatal = true;
373                    }
374                    if let Some(t) = eff_target
375                        && report.has_target_fail()
376                    {
377                        any_fatal = true;
378                        target_fail_cols += report
379                            .columns
380                            .iter()
381                            .filter(|c| c.target_status == Some(TargetStatus::Fail))
382                            .count();
383                        target_fail_label.get_or_insert(t.label());
384                    }
385                    if json_output {
386                        // `--json` + `--type-report` interaction (DESIGN):
387                        // emit BOTH, nested. Each export gets ONE JSON object
388                        // (NDJSON, one per line, unchanged) keeping the
389                        // top-level type-report keys (`export`/`columns`/
390                        // `violations`) so existing consumers and the
391                        // `check_json_flag_outputs_type_report_as_json` test
392                        // stay green — and we attach the per-export DIAGNOSTIC
393                        // verdict under a new `"diagnostic"` key. This is the
394                        // least-surprising shape because `check --json` already
395                        // emitted one type-report object per export; we simply
396                        // enrich each with its verdict rather than printing a
397                        // second, separate JSON value (which would break a
398                        // single-`from_str` parse of stdout).
399                        print_report_json_with_diagnostic(
400                            &report,
401                            diag_by_export.get(export.name.as_str()).copied(),
402                        )?;
403                    } else {
404                        type_report::print_table(&report, eff_target);
405                    }
406                }
407                Err(e) => {
408                    log::warn!("type report for '{}' failed: {:#}", export.name, e);
409                    // The type report could not be collected, but the diagnostic
410                    // was. Under --json the verdict must still reach the
411                    // consumer, so emit a diagnostic-only object (no `columns`/
412                    // `violations`) rather than silently dropping this export.
413                    if json_output
414                        && let Some(diag) = diag_by_export.get(export.name.as_str()).copied()
415                    {
416                        println!("{}", serde_json::to_string(diag)?);
417                    }
418                }
419            }
420        }
421
422        if strict && any_fatal {
423            anyhow::bail!("strict mode: unsafe type mappings found (see report above)");
424        } else if !strict && target_fail_cols > 0 && !json_output {
425            // The table showed "fail ✗" but rc is 0 — say so explicitly. Skipped
426            // under --json so NDJSON output stays one object per line.
427            clean = false;
428            println!();
429            println!(
430                "{}",
431                target_fail_note(target_fail_cols, target_fail_label.unwrap_or("target"))
432            );
433        }
434    }
435
436    if !json_output {
437        // Verdict legend — decode the EFFICIENT/ACCEPTABLE/DEGRADED/UNSAFE words
438        // printed above and reassure that `check` is advisory: never blocks a run.
439        println!();
440        println!(
441            "Verdicts: EFFICIENT > ACCEPTABLE > DEGRADED > UNSAFE — advisory only; the run is never blocked."
442        );
443        // The "Looks good. Next: …" epilogue is printed by the CALLER
444        // (`dispatch_check`) only after the plan-compatibility gate also passes —
445        // otherwise a config the plan-gate REJECTS printed "Looks good" and then
446        // "Rejected: …" in the same output, a self-contradiction (dogfood MED).
447    }
448
449    // `clean` = the type check surfaced no fatal mapping. The caller ANDs this
450    // with the plan-compatibility gate before printing the success epilogue.
451    Ok(clean)
452}
453
454/// Emit one export's `--json` line: the type report (`export`/`columns`/
455/// `violations`/…) with the per-export DIAGNOSTIC verdict attached under a new
456/// `"diagnostic"` key. NDJSON — exactly one JSON object, terminated by a
457/// newline, so a multi-export config prints one parseable object per line
458/// (preserving the prior `check --json` type-report wire shape, now enriched).
459///
460/// `diag` is `None` only if the diagnostic could not be paired by export name
461/// (it always can in practice); in that case the type report is emitted as
462/// before, so the worst case is a missing `diagnostic` key, never a panic.
463fn print_report_json_with_diagnostic(
464    report: &type_report::ExportTypeReport,
465    diag: Option<&ExportDiagnostic>,
466) -> Result<()> {
467    let mut value = serde_json::to_value(report)?;
468    if let (Some(obj), Some(diag)) = (value.as_object_mut(), diag) {
469        obj.insert("diagnostic".to_string(), serde_json::to_value(diag)?);
470    }
471    println!("{}", serde_json::to_string(&value)?);
472    Ok(())
473}
474
475fn print_diagnostic(diag: &ExportDiagnostic) {
476    println!();
477    println!("Export: {}", diag.export_name);
478    println!("  Strategy:     {}", diag.strategy);
479    println!("  Mode:         {}", diag.mode);
480    if let Some(est) = diag.row_estimate {
481        if est >= 1_000_000 {
482            println!("  Row estimate: ~{}M", est / 1_000_000);
483        } else if est >= 1_000 {
484            println!("  Row estimate: ~{}K", est / 1_000);
485        } else {
486            println!("  Row estimate: ~{}", est);
487        }
488    }
489    if let Some(w) = diag.avg_row_bytes {
490        println!("  Row width:    ~{} bytes", w);
491    }
492    if let (Some(min_v), Some(max_v)) = (&diag.cursor_min, &diag.cursor_max) {
493        println!("  Cursor range: {} .. {}", min_v, max_v);
494    }
495    if let Some(col) = &diag.cursor_column {
496        println!("  Cursor col:   {}", col);
497    }
498    // Plain-language access path instead of a raw EXPLAIN node dump
499    // (`Result (cost=0.00..0.01 rows=1 width=36)`). Keyed off the authoritative
500    // `uses_index` bool, gated on `scan_type.is_some()` so engines without an
501    // EXPLAIN probe (MSSQL) stay silent.
502    if diag.scan_type.is_some() {
503        let access = if diag.uses_index {
504            "index scan (the cursor/chunk column is indexed)"
505        } else {
506            "full table scan (no index on the read path)"
507        };
508        println!("  Access:       {access}");
509    }
510    println!("  Verdict:      {}", diag.verdict);
511    println!(
512        "  Recommended:  tuning.profile: {}",
513        diag.recommended_profile
514    );
515    let (par_level, par_reason) = diag.recommended_parallel;
516    if par_level > 1 {
517        println!("  Recommended:  parallel: {} ({})", par_level, par_reason);
518    } else {
519        println!("  Parallelism:  {} ({})", par_level, par_reason);
520    }
521    for w in &diag.warnings {
522        println!("  Warning:      [{}] {}", w.severity.label(), w.message);
523    }
524    if let Some(suggestion) = &diag.suggestion {
525        println!("  Suggestion:   {}", suggestion);
526    }
527}
528
529#[cfg(test)]
530mod tests {
531    use super::*;
532    use crate::config::{DestinationConfig, DestinationType, ExportConfig, ExportMode, FormatType};
533    use doctor::{
534        categorize_dest_error, categorize_source_error, destination_error_hint, source_error_hint,
535    };
536    use serde_json::Value;
537
538    fn make_export(name: &str, mode: ExportMode, cursor: Option<&str>) -> ExportConfig {
539        // Baseline from the canonical test fixture; override only the fields
540        // these preflight tests vary (mode, cursor, CSV format, query, dest).
541        ExportConfig {
542            mode,
543            cursor_column: cursor.map(|s| s.to_string()),
544            query: Some("SELECT * FROM t".to_string()),
545            format: FormatType::Csv,
546            destination: DestinationConfig {
547                destination_type: DestinationType::Local,
548                path: Some("./out".to_string()),
549                ..Default::default()
550            },
551            ..crate::config::sample_export(name)
552        }
553    }
554
555    /// A representative incremental diagnostic for the `--json` serialization
556    /// tests: a cursor column (so `has_cursor` is true), an index (so
557    /// `uses_index` is true), a >1 parallel recommendation (so `can_parallel`
558    /// is true), and a couple of warnings.
559    fn sample_diagnostic(name: &str) -> ExportDiagnostic {
560        ExportDiagnostic {
561            export_name: name.to_string(),
562            strategy: "incremental(updated_at)".to_string(),
563            mode: "incremental".to_string(),
564            cursor_column: Some("updated_at".to_string()),
565            row_estimate: Some(1_234_567),
566            avg_row_bytes: Some(96),
567            cursor_min: Some("2020-01-01".to_string()),
568            cursor_max: Some("2024-01-01".to_string()),
569            scan_type: Some("Index Scan".to_string()),
570            uses_index: true,
571            verdict: HealthVerdict::Degraded,
572            recommended_profile: "safe",
573            recommended_parallel: (4, "large indexed dataset"),
574            warnings: vec![
575                analysis::Warning::new(analysis::Severity::Medium, "Sparse key range".to_string()),
576                analysis::Warning::new(analysis::Severity::High, "memory risk".to_string()),
577            ],
578            suggestion: Some("create an index".to_string()),
579        }
580    }
581
582    // ── `rivet check --json`: the per-export DIAGNOSTIC verdict as JSON ───────
583
584    #[test]
585    fn diagnostic_json_has_lowercase_verdict_and_core_fields() {
586        let diag = sample_diagnostic("orders");
587        let v: serde_json::Value =
588            serde_json::from_str(&serde_json::to_string(&diag).unwrap()).unwrap();
589
590        // Verdict serializes to a stable lowercase token (not the SHOUTING
591        // Display form), so CI can match on it case-sensitively.
592        assert_eq!(v["verdict"], "degraded", "got: {v}");
593        assert_eq!(v["strategy"], "incremental(updated_at)", "got: {v}");
594        assert_eq!(v["mode"], "incremental", "got: {v}");
595        assert_eq!(v["recommended_profile"], "safe", "got: {v}");
596        assert!(v["warnings"].is_array(), "warnings must be an array: {v}");
597        assert_eq!(v["warnings"].as_array().unwrap().len(), 2, "got: {v}");
598        // Each warning is a `{ severity, message }` object (per-warning severity).
599        assert_eq!(v["warnings"][0]["severity"], "medium", "got: {v}");
600        assert_eq!(v["warnings"][0]["message"], "Sparse key range", "got: {v}");
601        assert_eq!(v["warnings"][1]["severity"], "high", "got: {v}");
602        assert_eq!(v["export_name"], "orders", "got: {v}");
603    }
604
605    #[test]
606    fn diagnostic_json_verdict_tokens_are_all_lowercase() {
607        for (verdict, token) in [
608            (HealthVerdict::Efficient, "efficient"),
609            (HealthVerdict::Acceptable, "acceptable"),
610            (HealthVerdict::Degraded, "degraded"),
611            (HealthVerdict::Unsafe, "unsafe"),
612        ] {
613            let mut diag = sample_diagnostic("t");
614            diag.verdict = verdict;
615            let v: serde_json::Value =
616                serde_json::from_str(&serde_json::to_string(&diag).unwrap()).unwrap();
617            assert_eq!(v["verdict"], token, "verdict must lowercase to {token}");
618        }
619    }
620
621    #[test]
622    fn diagnostic_json_recommended_parallel_is_named_object_not_tuple() {
623        // The raw `(u32, &str)` must NOT leak as a positional 2-array; consumers
624        // read `recommended_parallel.level` / `.reason`.
625        let diag = sample_diagnostic("t");
626        let v: serde_json::Value =
627            serde_json::from_str(&serde_json::to_string(&diag).unwrap()).unwrap();
628        assert!(
629            v["recommended_parallel"].is_object(),
630            "recommended_parallel must be an object, got: {}",
631            v["recommended_parallel"]
632        );
633        assert_eq!(v["recommended_parallel"]["level"], 4, "got: {v}");
634        assert_eq!(
635            v["recommended_parallel"]["reason"], "large indexed dataset",
636            "got: {v}"
637        );
638    }
639
640    #[test]
641    fn diagnostic_json_capabilities_are_derived_from_fields() {
642        let diag = sample_diagnostic("t");
643        let v: serde_json::Value =
644            serde_json::from_str(&serde_json::to_string(&diag).unwrap()).unwrap();
645        let caps = &v["capabilities"];
646        assert_eq!(caps["uses_index"], true, "got: {caps}");
647        assert_eq!(caps["has_cursor"], true, "got: {caps}");
648        assert_eq!(caps["can_parallel"], true, "got: {caps}");
649    }
650
651    #[test]
652    fn diagnostic_json_capabilities_flip_with_fields() {
653        // A non-cursor, no-index, single-worker diagnostic flips all three.
654        let mut diag = sample_diagnostic("t");
655        diag.cursor_column = None;
656        diag.uses_index = false;
657        diag.recommended_parallel = (1, "small dataset");
658        let v: serde_json::Value =
659            serde_json::from_str(&serde_json::to_string(&diag).unwrap()).unwrap();
660        let caps = &v["capabilities"];
661        assert_eq!(caps["uses_index"], false, "got: {caps}");
662        assert_eq!(caps["has_cursor"], false, "got: {caps}");
663        assert_eq!(caps["can_parallel"], false, "got: {caps}");
664    }
665
666    #[test]
667    fn diagnostic_json_skips_none_optionals() {
668        // `None` optionals are omitted (not `null`) to keep the object lean.
669        let mut diag = sample_diagnostic("t");
670        diag.suggestion = None;
671        diag.scan_type = None;
672        let v: serde_json::Value =
673            serde_json::from_str(&serde_json::to_string(&diag).unwrap()).unwrap();
674        let obj = v.as_object().unwrap();
675        assert!(!obj.contains_key("suggestion"), "None must be omitted: {v}");
676        assert!(!obj.contains_key("scan_type"), "None must be omitted: {v}");
677    }
678
679    /// Build the same `Value` `print_report_json_with_diagnostic` prints, so the
680    /// merged shape is asserted without capturing stdout.
681    fn merged_check_json(report: &type_report::ExportTypeReport, diag: &ExportDiagnostic) -> Value {
682        let mut value = serde_json::to_value(report).unwrap();
683        value.as_object_mut().unwrap().insert(
684            "diagnostic".to_string(),
685            serde_json::to_value(diag).unwrap(),
686        );
687        value
688    }
689
690    fn empty_report(export: &str) -> type_report::ExportTypeReport {
691        type_report::ExportTypeReport {
692            export: export.to_string(),
693            columns: Vec::new(),
694            violations: Vec::new(),
695            target_failures: false,
696            recovery_sql: None,
697        }
698    }
699
700    #[test]
701    fn check_json_merges_diagnostic_into_type_report_object() {
702        // The `--json` + `--type-report` interaction: ONE object per export
703        // keeping the type-report keys (`export`/`columns`/`violations`) — so
704        // the existing `check_json_flag_outputs_type_report_as_json` contract
705        // holds — PLUS a nested `diagnostic` carrying the verdict.
706        let report = empty_report("orders");
707        let diag = sample_diagnostic("orders");
708        let v = merged_check_json(&report, &diag);
709
710        // Pre-existing type-report keys still at the root.
711        assert_eq!(v["export"], "orders", "got: {v}");
712        assert!(v["columns"].is_array(), "columns at root: {v}");
713        assert!(v["violations"].is_array(), "violations at root: {v}");
714
715        // The diagnostic is nested and carries the verdict + advice.
716        let d = &v["diagnostic"];
717        assert_eq!(d["verdict"], "degraded", "got: {d}");
718        assert_eq!(d["strategy"], "incremental(updated_at)", "got: {d}");
719        assert_eq!(d["mode"], "incremental", "got: {d}");
720        assert_eq!(d["recommended_profile"], "safe", "got: {d}");
721        assert!(d["warnings"].is_array(), "warnings array: {d}");
722        assert_eq!(d["capabilities"]["has_cursor"], true, "got: {d}");
723    }
724
725    #[test]
726    fn check_json_object_is_a_single_parseable_line() {
727        // NDJSON: serializing yields exactly one JSON value with no trailing
728        // data, so `serde_json::from_str(line.trim())` (as the live test does)
729        // parses it whole.
730        let report = empty_report("orders");
731        let diag = sample_diagnostic("orders");
732        let line = serde_json::to_string(&merged_check_json(&report, &diag)).unwrap();
733        assert!(!line.contains('\n'), "one object per line: {line}");
734        let parsed: Value = serde_json::from_str(line.trim()).expect("must parse whole");
735        assert_eq!(parsed["export"], "orders");
736    }
737
738    // ── L8: 'fail ✗' note when --target FAILs but --strict was not passed ─────
739    // The glyph implies a hard failure; exit is gated only by --strict. The note
740    // tells an operator/CI the exit is 0 so the glyph doesn't mislead.
741    #[test]
742    fn target_fail_note_names_count_target_and_strict_gate() {
743        let note = target_fail_note(2, "bigquery");
744        assert!(note.contains("2 columns FAIL"), "got: {note}");
745        assert!(note.contains("bigquery"), "got: {note}");
746        assert!(note.contains("--strict"), "got: {note}");
747        assert!(note.contains("exit 0"), "got: {note}");
748    }
749
750    #[test]
751    fn target_fail_note_singular_for_one_column() {
752        let note = target_fail_note(1, "duckdb");
753        assert!(note.contains("1 column FAIL"), "got: {note}");
754        assert!(!note.contains("1 columns"), "should be singular: {note}");
755    }
756
757    #[test]
758    fn verdict_small_indexed_with_cursor_is_efficient() {
759        let v = compute_verdict(Some(500_000), true, true, None, 1);
760        assert!(matches!(v, HealthVerdict::Efficient), "got: {v}");
761    }
762
763    #[test]
764    fn verdict_large_indexed_with_cursor_is_acceptable() {
765        let v = compute_verdict(Some(20_000_000), true, true, None, 1);
766        assert!(matches!(v, HealthVerdict::Acceptable), "got: {v}");
767    }
768
769    #[test]
770    fn verdict_no_index_no_cursor_is_degraded() {
771        let v = compute_verdict(Some(500_000), false, false, None, 1);
772        assert!(matches!(v, HealthVerdict::Degraded), "got: {v}");
773    }
774
775    #[test]
776    fn verdict_huge_no_index_is_unsafe() {
777        let v = compute_verdict(Some(100_000_000), false, false, None, 1);
778        assert!(matches!(v, HealthVerdict::Unsafe), "got: {v}");
779    }
780
781    #[test]
782    fn parse_pg_row_estimate_from_sort_plan() {
783        let plan = "Sort  (cost=12345.67..12456.78 rows=1000455 width=50)\n  ->  Seq Scan on orders  (cost=0.00..8765.43 rows=1000455 width=50)";
784        assert_eq!(parse_pg_row_estimate(plan), Some(1_000_455));
785    }
786
787    #[test]
788    fn parse_pg_row_estimate_from_index_scan() {
789        let plan =
790            "Index Scan using idx_updated on orders  (cost=0.42..81676.36 rows=500000 width=50)";
791        assert_eq!(parse_pg_row_estimate(plan), Some(500_000));
792    }
793
794    #[test]
795    fn extract_scan_type_detects_seq_scan() {
796        let plan = "Sort  (cost=...)\n  ->  Seq Scan on users  (cost=...)";
797        let st = extract_scan_type(plan);
798        assert!(st.contains("Seq Scan"), "expected Seq Scan, got: {st}");
799    }
800
801    #[test]
802    fn extract_scan_type_detects_index_scan() {
803        let plan = "Index Scan using users_pkey on users  (cost=0.42..123.45 rows=100 width=50)";
804        let st = extract_scan_type(plan);
805        assert!(st.contains("Index Scan"), "expected Index Scan, got: {st}");
806    }
807
808    #[test]
809    fn suggestion_for_efficient_verdict_is_none() {
810        let e = make_export("t", ExportMode::Full, None);
811        let s = build_suggestion(&HealthVerdict::Efficient, Some(1000), true, &e);
812        assert!(
813            s.is_none(),
814            "efficient verdict should produce no suggestion"
815        );
816    }
817
818    #[test]
819    fn suggestion_for_degraded_verdict_recommends_safe_profile() {
820        let e = make_export("t", ExportMode::Full, None);
821        let s = build_suggestion(&HealthVerdict::Degraded, Some(500_000), false, &e);
822        let msg = s.expect("degraded verdict should produce a suggestion");
823        assert!(
824            msg.contains("safe"),
825            "suggestion should recommend safe profile, got: {msg}"
826        );
827    }
828
829    fn src_err(msg: &str) -> &'static str {
830        categorize_source_error(&anyhow::anyhow!("{}", msg))
831    }
832
833    #[test]
834    fn source_password_rejected_is_auth_error() {
835        assert_eq!(
836            src_err("password authentication failed for user \"rivet\""),
837            "auth error"
838        );
839    }
840
841    #[test]
842    fn source_authentication_failed_is_auth_error() {
843        assert_eq!(src_err("FATAL: authentication failed"), "auth error");
844    }
845
846    #[test]
847    fn source_access_denied_is_auth_error() {
848        assert_eq!(
849            src_err("Access denied for user 'rivet'@'localhost'"),
850            "auth error"
851        );
852    }
853
854    #[test]
855    fn source_connection_refused_is_connectivity() {
856        assert_eq!(
857            src_err("connection refused (os error 61)"),
858            "connectivity error"
859        );
860    }
861
862    #[test]
863    fn source_timed_out_is_connectivity() {
864        assert_eq!(src_err("connection timed out"), "connectivity error");
865    }
866
867    #[test]
868    fn source_dns_translate_host_is_connectivity() {
869        assert_eq!(
870            src_err("could not translate host name \"db.bad\" to address"),
871            "connectivity error"
872        );
873    }
874
875    #[test]
876    fn source_name_not_known_is_connectivity() {
877        assert_eq!(src_err("Name or service not known"), "connectivity error");
878    }
879
880    #[test]
881    fn source_unknown_error_is_generic() {
882        assert_eq!(src_err("something totally unexpected"), "error");
883    }
884
885    fn dest_config(dtype: DestinationType) -> DestinationConfig {
886        DestinationConfig {
887            destination_type: dtype,
888            bucket: Some("b".to_string()),
889            ..Default::default()
890        }
891    }
892
893    fn dest_err(msg: &str, dtype: DestinationType) -> &'static str {
894        let cfg = dest_config(dtype);
895        categorize_dest_error(&anyhow::anyhow!("{}", msg), &cfg)
896    }
897
898    fn local_dest(path: &str) -> DestinationConfig {
899        DestinationConfig {
900            destination_type: DestinationType::Local,
901            path: Some(path.to_string()),
902            ..Default::default()
903        }
904    }
905
906    // Regression (doctor-dedup): doctor's inline dedup key omitted `path`,
907    // so two local destinations with different paths collapsed to one entry
908    // and the second was never write-probed. The shared identity must keep
909    // them distinct.
910    #[test]
911    fn destination_identity_distinguishes_local_paths() {
912        assert_ne!(
913            destination_identity(&local_dest("/tmp/a")),
914            destination_identity(&local_dest("/tmp/b")),
915        );
916    }
917
918    #[test]
919    fn destination_identity_collapses_identical_local_destinations() {
920        assert_eq!(
921            destination_identity(&local_dest("/tmp/a")),
922            destination_identity(&local_dest("/tmp/a")),
923        );
924    }
925
926    #[test]
927    fn destination_identity_distinguishes_buckets() {
928        let a = DestinationConfig {
929            bucket: Some("bucket-a".to_string()),
930            ..dest_config(DestinationType::S3)
931        };
932        let b = DestinationConfig {
933            bucket: Some("bucket-b".to_string()),
934            ..dest_config(DestinationType::S3)
935        };
936        assert_ne!(destination_identity(&a), destination_identity(&b));
937    }
938
939    // Same bucket name on different endpoints (e.g. AWS vs MinIO) is two
940    // distinct destinations and must be probed separately.
941    #[test]
942    fn destination_identity_distinguishes_endpoints_for_same_bucket() {
943        let aws = dest_config(DestinationType::S3);
944        let minio = DestinationConfig {
945            endpoint: Some("http://localhost:9000".to_string()),
946            ..dest_config(DestinationType::S3)
947        };
948        assert_ne!(destination_identity(&aws), destination_identity(&minio));
949    }
950
951    #[test]
952    fn dest_credential_loading_is_auth_error() {
953        assert_eq!(
954            dest_err(
955                "loading credential to sign http request",
956                DestinationType::Gcs
957            ),
958            "auth error"
959        );
960    }
961
962    #[test]
963    fn dest_permission_denied_is_auth_error() {
964        assert_eq!(
965            dest_err("permission denied on resource bucket", DestinationType::S3),
966            "auth error"
967        );
968    }
969
970    #[test]
971    fn dest_forbidden_is_auth_error() {
972        assert_eq!(
973            dest_err("403 Forbidden", DestinationType::Gcs),
974            "auth error"
975        );
976    }
977
978    #[test]
979    fn dest_unauthorized_is_auth_error() {
980        assert_eq!(
981            dest_err("401 Unauthorized", DestinationType::S3),
982            "auth error"
983        );
984    }
985
986    #[test]
987    fn dest_invalid_grant_is_auth_error() {
988        assert_eq!(
989            dest_err(
990                "invalid_grant: token has been revoked",
991                DestinationType::Gcs
992            ),
993            "auth error"
994        );
995    }
996
997    #[test]
998    fn dest_nosuchbucket_s3_is_bucket_not_found() {
999        assert_eq!(
1000            dest_err(
1001                "NoSuchBucket: the specified bucket does not exist",
1002                DestinationType::S3
1003            ),
1004            "bucket not found"
1005        );
1006    }
1007
1008    #[test]
1009    fn dest_not_found_gcs_is_bucket_not_found() {
1010        assert_eq!(
1011            dest_err("bucket not found (404)", DestinationType::Gcs),
1012            "bucket not found"
1013        );
1014    }
1015
1016    #[test]
1017    fn dest_not_found_local_is_path_not_found() {
1018        assert_eq!(
1019            dest_err("path not found: /tmp/missing", DestinationType::Local),
1020            "path not found"
1021        );
1022    }
1023
1024    #[test]
1025    fn dest_connection_refused_is_connectivity() {
1026        assert_eq!(
1027            dest_err("connection refused to endpoint", DestinationType::S3),
1028            "connectivity error"
1029        );
1030    }
1031
1032    #[test]
1033    fn dest_dns_error_is_connectivity() {
1034        assert_eq!(
1035            dest_err("dns error: failed to lookup address", DestinationType::S3),
1036            "connectivity error"
1037        );
1038    }
1039
1040    #[test]
1041    fn dest_timed_out_is_connectivity() {
1042        assert_eq!(
1043            dest_err("request timed out after 30s", DestinationType::Gcs),
1044            "connectivity error"
1045        );
1046    }
1047
1048    #[test]
1049    fn dest_unknown_error_is_generic() {
1050        assert_eq!(
1051            dest_err("something else entirely", DestinationType::S3),
1052            "error"
1053        );
1054    }
1055
1056    #[test]
1057    fn strategy_full_scan() {
1058        let e = make_export("t", ExportMode::Full, None);
1059        assert_eq!(derive_strategy(&e), "full-scan");
1060    }
1061
1062    #[test]
1063    fn strategy_full_parallel() {
1064        let mut e = make_export("t", ExportMode::Full, None);
1065        e.parallel = 4;
1066        assert_eq!(derive_strategy(&e), "full-parallel(4)");
1067    }
1068
1069    #[test]
1070    fn strategy_incremental() {
1071        let e = make_export("t", ExportMode::Incremental, Some("updated_at"));
1072        assert_eq!(derive_strategy(&e), "incremental(updated_at)");
1073    }
1074
1075    #[test]
1076    fn strategy_chunked() {
1077        let mut e = make_export("t", ExportMode::Chunked, None);
1078        e.chunk_column = Some("id".to_string());
1079        e.chunk_size = 50_000;
1080        assert_eq!(derive_strategy(&e), "chunked(id, size=50000)");
1081    }
1082
1083    #[test]
1084    fn strategy_chunked_parallel() {
1085        let mut e = make_export("t", ExportMode::Chunked, None);
1086        e.chunk_column = Some("id".to_string());
1087        e.chunk_size = 50_000;
1088        e.parallel = 3;
1089        assert_eq!(derive_strategy(&e), "chunked-parallel(id, size=50000, p=3)");
1090    }
1091
1092    #[test]
1093    fn strategy_time_window() {
1094        let mut e = make_export("t", ExportMode::TimeWindow, None);
1095        e.time_column = Some("created_at".to_string());
1096        e.days_window = Some(7);
1097        assert_eq!(derive_strategy(&e), "time-window(created_at, 7d)");
1098    }
1099
1100    #[test]
1101    fn profile_small_indexed_is_fast() {
1102        let e = make_export("t", ExportMode::Full, None);
1103        assert_eq!(recommend_profile(Some(500_000), true, &e), "fast");
1104    }
1105
1106    #[test]
1107    fn profile_medium_indexed_is_balanced() {
1108        let e = make_export("t", ExportMode::Full, None);
1109        assert_eq!(recommend_profile(Some(5_000_000), true, &e), "balanced");
1110    }
1111
1112    #[test]
1113    fn profile_large_indexed_is_safe() {
1114        let e = make_export("t", ExportMode::Full, None);
1115        assert_eq!(recommend_profile(Some(50_000_000), true, &e), "safe");
1116    }
1117
1118    #[test]
1119    fn profile_small_no_index_is_balanced() {
1120        let e = make_export("t", ExportMode::Full, None);
1121        assert_eq!(recommend_profile(Some(50_000), false, &e), "balanced");
1122    }
1123
1124    #[test]
1125    fn profile_small_no_index_parallel_is_safe() {
1126        let mut e = make_export("t", ExportMode::Full, None);
1127        e.parallel = 4;
1128        assert_eq!(recommend_profile(Some(50_000), false, &e), "safe");
1129    }
1130
1131    #[test]
1132    fn profile_medium_no_index_is_balanced() {
1133        let e = make_export("t", ExportMode::Full, None);
1134        assert_eq!(recommend_profile(Some(500_000), false, &e), "balanced");
1135    }
1136
1137    #[test]
1138    fn profile_large_no_index_is_safe() {
1139        let e = make_export("t", ExportMode::Full, None);
1140        assert_eq!(recommend_profile(Some(5_000_000), false, &e), "safe");
1141    }
1142
1143    #[test]
1144    fn sparse_range_warning_when_very_sparse() {
1145        let mut e = make_export("t", ExportMode::Chunked, None);
1146        e.chunk_column = Some("id".to_string());
1147        e.chunk_size = 100_000;
1148        let w = check_sparse_range(&e, Some(100_000), Some("1"), Some("10000000"));
1149        assert!(w.is_some(), "should warn about sparse range");
1150        let msg = w.unwrap();
1151        assert!(msg.contains("Sparse key range"), "got: {msg}");
1152        assert!(msg.contains("empty"), "got: {msg}");
1153    }
1154
1155    #[test]
1156    fn sparse_range_no_warning_when_dense() {
1157        let mut e = make_export("t", ExportMode::Chunked, None);
1158        e.chunk_column = Some("id".to_string());
1159        e.chunk_size = 100_000;
1160        let w = check_sparse_range(&e, Some(100_000), Some("1"), Some("100000"));
1161        assert!(w.is_none(), "should not warn for dense range");
1162    }
1163
1164    #[test]
1165    fn sparse_range_skipped_when_chunk_dense() {
1166        let mut e = make_export("t", ExportMode::Chunked, None);
1167        e.chunk_column = Some("id".to_string());
1168        e.chunk_dense = true;
1169        e.chunk_size = 100_000;
1170        let w = check_sparse_range(&e, Some(100_000), Some("1"), Some("10000000"));
1171        assert!(
1172            w.is_none(),
1173            "chunk_dense uses ordinals, not physical id span"
1174        );
1175    }
1176
1177    #[test]
1178    fn dense_surrogate_warning_when_chunk_dense_builtin() {
1179        let mut e = make_export("t", ExportMode::Chunked, None);
1180        e.chunk_column = Some("id".to_string());
1181        e.chunk_dense = true;
1182        e.query = Some("SELECT id FROM orders".to_string());
1183        let w = check_dense_surrogate_cost(&e);
1184        assert!(w.is_some(), "should warn about built-in ROW_NUMBER cost");
1185        assert!(w.unwrap().contains("global sort"));
1186    }
1187
1188    #[test]
1189    fn sparse_range_not_triggered_for_non_chunked() {
1190        let e = make_export("t", ExportMode::Full, None);
1191        let w = check_sparse_range(&e, Some(100), Some("1"), Some("1000000"));
1192        assert!(w.is_none(), "should not warn for non-chunked mode");
1193    }
1194
1195    #[test]
1196    fn dense_surrogate_warning_with_row_number() {
1197        let mut e = make_export("t", ExportMode::Chunked, None);
1198        e.chunk_column = Some("rn".to_string());
1199        e.query = Some("SELECT *, ROW_NUMBER() OVER (ORDER BY id) AS rn FROM orders".to_string());
1200        let w = check_dense_surrogate_cost(&e);
1201        assert!(w.is_some(), "should warn about ROW_NUMBER cost");
1202        assert!(w.unwrap().contains("global sort"));
1203    }
1204
1205    #[test]
1206    fn no_dense_surrogate_warning_without_row_number() {
1207        let mut e = make_export("t", ExportMode::Chunked, None);
1208        e.chunk_column = Some("id".to_string());
1209        e.query = Some("SELECT * FROM orders".to_string());
1210        let w = check_dense_surrogate_cost(&e);
1211        assert!(w.is_none());
1212    }
1213
1214    #[test]
1215    fn no_dense_surrogate_warning_for_non_chunked() {
1216        let mut e = make_export("t", ExportMode::Full, None);
1217        e.query = Some("SELECT ROW_NUMBER() OVER () AS rn FROM t".to_string());
1218        let w = check_dense_surrogate_cost(&e);
1219        assert!(w.is_none(), "should not warn for non-chunked mode");
1220    }
1221
1222    #[test]
1223    fn parallel_memory_warning_large_dataset() {
1224        let mut e = make_export("t", ExportMode::Chunked, None);
1225        e.parallel = 4;
1226        let w = check_parallel_memory_risk(&e, Some(10_000_000));
1227        assert!(w.is_some(), "should warn about memory risk");
1228        let msg = w.unwrap();
1229        assert!(msg.contains("Parallel=4"), "got: {msg}");
1230        assert!(msg.contains("memory"), "got: {msg}");
1231    }
1232
1233    #[test]
1234    fn no_parallel_memory_warning_small_dataset() {
1235        let mut e = make_export("t", ExportMode::Chunked, None);
1236        e.parallel = 4;
1237        let w = check_parallel_memory_risk(&e, Some(1_000));
1238        assert!(w.is_none(), "should not warn for small dataset");
1239    }
1240
1241    #[test]
1242    fn no_parallel_memory_warning_single_worker() {
1243        let e = make_export("t", ExportMode::Full, None);
1244        let w = check_parallel_memory_risk(&e, Some(100_000_000));
1245        assert!(w.is_none(), "should not warn when parallel=1");
1246    }
1247
1248    #[test]
1249    fn suggestion_degraded_full_recommends_incremental() {
1250        let e = make_export("t", ExportMode::Full, None);
1251        let s = build_suggestion(&HealthVerdict::Degraded, Some(500_000), false, &e).unwrap();
1252        assert!(s.contains("incremental"), "got: {s}");
1253    }
1254
1255    #[test]
1256    fn suggestion_degraded_chunked_recommends_index() {
1257        let mut e = make_export("t", ExportMode::Chunked, None);
1258        e.chunk_column = Some("id".to_string());
1259        let s = build_suggestion(&HealthVerdict::Degraded, Some(500_000), false, &e).unwrap();
1260        assert!(s.contains("index on 'id'"), "got: {s}");
1261    }
1262
1263    #[test]
1264    fn suggestion_degraded_time_window_recommends_index() {
1265        let mut e = make_export("t", ExportMode::TimeWindow, None);
1266        e.time_column = Some("created_at".to_string());
1267        e.days_window = Some(7);
1268        let s = build_suggestion(&HealthVerdict::Degraded, Some(500_000), false, &e).unwrap();
1269        assert!(s.contains("index on 'created_at'"), "got: {s}");
1270    }
1271
1272    #[test]
1273    fn suggestion_unsafe_full_recommends_incremental() {
1274        let e = make_export("t", ExportMode::Full, None);
1275        let s = build_suggestion(&HealthVerdict::Unsafe, Some(100_000_000), false, &e).unwrap();
1276        assert!(s.contains("incremental"), "got: {s}");
1277    }
1278
1279    #[test]
1280    fn suggestion_unsafe_chunked_recommends_index_and_parallel() {
1281        let mut e = make_export("t", ExportMode::Chunked, None);
1282        e.chunk_column = Some("id".to_string());
1283        let s = build_suggestion(&HealthVerdict::Unsafe, Some(100_000_000), false, &e).unwrap();
1284        assert!(s.contains("index on 'id'"), "got: {s}");
1285        assert!(s.contains("parallel"), "got: {s}");
1286    }
1287
1288    #[test]
1289    fn suggestion_unsafe_incremental_recommends_index_on_cursor() {
1290        let e = make_export("t", ExportMode::Incremental, Some("updated_at"));
1291        let s = build_suggestion(&HealthVerdict::Unsafe, Some(100_000_000), false, &e).unwrap();
1292        assert!(s.contains("index on 'updated_at'"), "got: {s}");
1293    }
1294
1295    #[test]
1296    fn suggestion_acceptable_large_full_recommends_incremental() {
1297        let e = make_export("t", ExportMode::Full, None);
1298        let s = build_suggestion(&HealthVerdict::Acceptable, Some(20_000_000), true, &e).unwrap();
1299        assert!(s.contains("incremental"), "got: {s}");
1300    }
1301
1302    #[test]
1303    fn parallel_only_for_chunked_mode() {
1304        let e = make_export("t", ExportMode::Full, None);
1305        let (level, _) = recommend_parallelism(&e, Some(1_000_000), true);
1306        assert_eq!(level, 1, "non-chunked mode should recommend 1");
1307    }
1308
1309    #[test]
1310    fn parallel_small_dataset_is_one() {
1311        let mut e = make_export("t", ExportMode::Chunked, None);
1312        e.chunk_column = Some("id".to_string());
1313        let (level, _) = recommend_parallelism(&e, Some(10_000), true);
1314        assert_eq!(level, 1, "small dataset should recommend 1");
1315    }
1316
1317    #[test]
1318    fn parallel_moderate_indexed_is_two() {
1319        let mut e = make_export("t", ExportMode::Chunked, None);
1320        e.chunk_column = Some("id".to_string());
1321        let (level, _) = recommend_parallelism(&e, Some(200_000), true);
1322        assert_eq!(level, 2, "moderate indexed dataset should recommend 2");
1323    }
1324
1325    #[test]
1326    fn parallel_large_indexed_is_four() {
1327        let mut e = make_export("t", ExportMode::Chunked, None);
1328        e.chunk_column = Some("id".to_string());
1329        let (level, _) = recommend_parallelism(&e, Some(2_000_000), true);
1330        assert_eq!(level, 4, "large indexed dataset should recommend 4");
1331    }
1332
1333    #[test]
1334    fn parallel_no_index_large_is_one() {
1335        let mut e = make_export("t", ExportMode::Chunked, None);
1336        e.chunk_column = Some("id".to_string());
1337        let (level, reason) = recommend_parallelism(&e, Some(10_000_000), false);
1338        assert_eq!(level, 1, "no index + large should recommend 1");
1339        assert!(reason.contains("no index"), "got: {reason}");
1340    }
1341
1342    #[test]
1343    fn parallel_no_index_moderate_is_conservative() {
1344        let mut e = make_export("t", ExportMode::Chunked, None);
1345        e.chunk_column = Some("id".to_string());
1346        let (level, _) = recommend_parallelism(&e, Some(200_000), false);
1347        assert_eq!(
1348            level, 2,
1349            "no index + moderate should recommend 2 (conservative)"
1350        );
1351    }
1352
1353    #[test]
1354    fn suggestion_acceptable_large_chunked_recommends_parallel() {
1355        let mut e = make_export("t", ExportMode::Chunked, None);
1356        e.chunk_column = Some("id".to_string());
1357        let s = build_suggestion(&HealthVerdict::Acceptable, Some(20_000_000), true, &e).unwrap();
1358        assert!(s.contains("parallel"), "got: {s}");
1359    }
1360
1361    #[test]
1362    fn connection_limit_warn_when_parallel_meets_max() {
1363        let w = check_connection_limit(20, Some(20));
1364        assert!(w.is_some(), "should warn when parallel == max_connections");
1365        let msg = w.unwrap();
1366        assert!(msg.contains("max_connections=20"), "got: {msg}");
1367        assert!(msg.contains("parallel=20"), "got: {msg}");
1368    }
1369
1370    #[test]
1371    fn connection_limit_warn_when_parallel_exceeds_max() {
1372        let w = check_connection_limit(100, Some(20));
1373        assert!(w.is_some(), "should warn when parallel > max_connections");
1374        let msg = w.unwrap();
1375        assert!(msg.contains("max_connections=20"), "got: {msg}");
1376    }
1377
1378    #[test]
1379    fn connection_limit_no_warn_when_parallel_below_max() {
1380        let w = check_connection_limit(4, Some(100));
1381        assert!(
1382            w.is_none(),
1383            "should not warn when parallel << max_connections"
1384        );
1385    }
1386
1387    #[test]
1388    fn connection_limit_no_warn_when_parallel_is_one() {
1389        let w = check_connection_limit(1, Some(5));
1390        assert!(
1391            w.is_none(),
1392            "single worker never triggers connection warning"
1393        );
1394    }
1395
1396    #[test]
1397    fn connection_limit_skipped_note_when_max_unknown_and_parallel_gt_one() {
1398        let w = check_connection_limit(100, None);
1399        assert!(w.is_some(), "should note that check was skipped");
1400        let msg = w.unwrap();
1401        assert!(msg.contains("skipped"), "got: {msg}");
1402    }
1403
1404    #[test]
1405    fn connection_limit_no_note_when_max_unknown_and_parallel_is_one() {
1406        let w = check_connection_limit(1, None);
1407        assert!(
1408            w.is_none(),
1409            "single worker never triggers connection warning"
1410        );
1411    }
1412
1413    #[test]
1414    fn connection_limit_suggests_headroom() {
1415        let w = check_connection_limit(25, Some(20)).unwrap();
1416        // Suggested safe max should be max_connections - 3 = 17
1417        assert!(
1418            w.contains("17"),
1419            "should suggest leaving headroom, got: {w}"
1420        );
1421    }
1422
1423    // ── v0.7.4: actionable hints next to categorised errors ───────────
1424
1425    fn src_hint(msg: &str, st: SourceType) -> Option<&'static str> {
1426        let err = anyhow::anyhow!("{}", msg);
1427        let cat = categorize_source_error(&err);
1428        source_error_hint(cat, &err, &st)
1429    }
1430
1431    fn dest_hint(msg: &str, dt: DestinationType) -> Option<&'static str> {
1432        let err = anyhow::anyhow!("{}", msg);
1433        let dest = DestinationConfig {
1434            destination_type: dt,
1435            bucket: Some("b".into()),
1436            ..Default::default()
1437        };
1438        let cat = categorize_dest_error(&err, &dest);
1439        destination_error_hint(cat, &dest)
1440    }
1441
1442    #[test]
1443    fn source_tls_handshake_returns_pg_specific_tls_hint() {
1444        let h = src_hint("TLS handshake failed", SourceType::Postgres).expect("hint");
1445        assert!(h.contains("tls.mode") && h.contains("ca_file"), "got: {h}");
1446    }
1447
1448    #[test]
1449    fn source_tls_handshake_returns_mysql_specific_tls_hint() {
1450        let h = src_hint("certificate verify failed", SourceType::Mysql).expect("hint");
1451        assert!(h.contains("tls.mode"), "got: {h}");
1452    }
1453
1454    #[test]
1455    fn source_auth_error_postgres_mentions_pg_hba() {
1456        let h = src_hint("password authentication failed", SourceType::Postgres).expect("hint");
1457        assert!(h.contains("pg_hba") && h.contains("SELECT"), "got: {h}");
1458    }
1459
1460    #[test]
1461    fn source_auth_error_mysql_mentions_grant() {
1462        let h = src_hint(
1463            "Access denied for user 'rivet'@'localhost'",
1464            SourceType::Mysql,
1465        )
1466        .expect("hint");
1467        assert!(h.contains("GRANT") && h.contains("FLUSH"), "got: {h}");
1468    }
1469
1470    #[test]
1471    fn source_connectivity_error_mentions_bastion_and_network() {
1472        let h = src_hint("connection refused", SourceType::Postgres).expect("hint");
1473        assert!(h.contains("bastion") || h.contains("VPN"), "got: {h}");
1474    }
1475
1476    #[test]
1477    fn source_unknown_error_returns_no_hint() {
1478        // Generic "error" category should yield no hint — better to
1479        // print the raw driver message than to mislead.
1480        let h = src_hint("totally unexpected", SourceType::Postgres);
1481        assert!(h.is_none(), "unknown errors should not produce a hint");
1482    }
1483
1484    #[test]
1485    fn dest_s3_auth_error_names_concrete_actions() {
1486        let h = dest_hint("permission denied", DestinationType::S3).expect("hint");
1487        assert!(
1488            h.contains("s3:PutObject") && h.contains("cloud-permissions"),
1489            "got: {h}"
1490        );
1491    }
1492
1493    #[test]
1494    fn dest_gcs_auth_error_names_concrete_actions() {
1495        let h = dest_hint("403 Forbidden", DestinationType::Gcs).expect("hint");
1496        assert!(
1497            h.contains("storage.objects") && h.contains("cloud-permissions"),
1498            "got: {h}"
1499        );
1500    }
1501
1502    #[test]
1503    fn categorize_dest_error_sas_expired_message_returns_sas_expired_category() {
1504        // Guard the load-bearing ordering in categorize_dest_error: the
1505        // "sas expired" early-return must fire before the generic "token"
1506        // branch, or destination_error_hint produces the wrong hint.
1507        // This test pins the *category string*, not just the final hint text.
1508        let err = anyhow::anyhow!(
1509            "Azure SAS token already expired (se=2024-01-01T00:00:00Z). Generate a new SAS and re-export."
1510        );
1511        let dest = DestinationConfig {
1512            destination_type: DestinationType::Azure,
1513            bucket: Some("c".into()),
1514            ..Default::default()
1515        };
1516        let cat = categorize_dest_error(&err, &dest);
1517        assert_eq!(
1518            cat, "sas expired",
1519            "expired-SAS error must categorise as 'sas expired', not '{cat}' — ordering in categorize_dest_error is load-bearing"
1520        );
1521    }
1522
1523    #[test]
1524    fn dest_azure_sas_expired_returns_regenerate_hint() {
1525        // The Azure preflight (v0.7.4) bails with "expired (se=…)" —
1526        // the hint must steer the operator to `az storage container
1527        // generate-sas` not "your IAM role is broken".
1528        let h = dest_hint(
1529            "Azure SAS token already expired (se=2024-01-01T00:00:00Z)",
1530            DestinationType::Azure,
1531        )
1532        .expect("hint");
1533        assert!(
1534            h.contains("generate-sas") && h.contains("AZURE_STORAGE_SAS_TOKEN"),
1535            "got: {h}"
1536        );
1537    }
1538
1539    #[test]
1540    fn dest_s3_bucket_not_found_says_no_auto_create() {
1541        let h = dest_hint("NoSuchBucket", DestinationType::S3).expect("hint");
1542        assert!(
1543            h.contains("does NOT auto-create") && h.contains("aws s3 mb"),
1544            "got: {h}"
1545        );
1546    }
1547
1548    #[test]
1549    fn dest_s3_connectivity_error_warns_about_region_mismatch() {
1550        let h = dest_hint("dns error", DestinationType::S3).expect("hint");
1551        assert!(h.contains("region") || h.contains("endpoint"), "got: {h}");
1552    }
1553}
rivet/preflight/mod.rs

rivet/preflight/
mod.rs