Skip to main content

rivet/preflight/
type_report.rs

1//! `rivet check --type-report` — tabular and JSON output.
2//!
3//! Roadmap §9 ("Type Fidelity Report") and §16 ("BigQuery Compatibility Layer").
4//! Renders a `Vec<TypeMapping>` plus any `PolicyViolation`s as either a
5//! fixed-width terminal table or newline-delimited JSON.
6
7use serde::Serialize;
8
9use crate::config::{Config, ExportConfig, FormatType, SourceType};
10use crate::error::Result;
11use crate::source;
12use crate::types::{
13    ColumnOverrides, TypeFidelity,
14    policy::{PolicyAction, PolicyViolation, TypePolicy},
15    target::{ExportTarget, TargetInput, TargetStatus},
16};
17
18/// One row in the type report (and the JSON output — roadmap §9).
19#[derive(Serialize)]
20pub struct TypeReportRow {
21    pub column: String,
22    pub source_type: String,
23    pub rivet_type: String,
24    pub arrow_type: String,
25    pub fidelity: TypeFidelity,
26    #[serde(skip_serializing_if = "Vec::is_empty")]
27    pub warnings: Vec<String>,
28    /// Present when `--target` is set.
29    #[serde(skip_serializing_if = "Option::is_none")]
30    pub target_type: Option<String>,
31    #[serde(skip_serializing_if = "Option::is_none")]
32    pub target_status: Option<TargetStatus>,
33    #[serde(skip_serializing_if = "Option::is_none")]
34    pub target_note: Option<String>,
35    /// Type a generic Parquet reader infers without a declared schema, surfaced
36    /// only when it diverges from `target_type` (e.g. BigQuery autoloads JSON
37    /// as BYTES). Present when `--target` is set and autoload ≠ native.
38    #[serde(skip_serializing_if = "Option::is_none")]
39    pub autoload_type: Option<String>,
40    /// Materialization / load-schema hint (L5) to recover the native type.
41    #[serde(skip_serializing_if = "Option::is_none")]
42    pub cast_sql: Option<String>,
43}
44
45/// One export's type-report data.
46#[derive(Serialize)]
47pub struct ExportTypeReport {
48    pub export: String,
49    pub columns: Vec<TypeReportRow>,
50    pub violations: Vec<PolicyViolation>,
51    /// True when any column failed target-compatibility.
52    #[serde(skip_serializing_if = "std::ops::Not::not")]
53    pub target_failures: bool,
54    /// Target-native recovery SQL (ADR-0014 L5): a post-load transform that
55    /// recovers types bare autoload degrades (BigQuery JSON/UUID/DATETIME).
56    /// `None` for targets that autoload faithfully (DuckDB) or when no target
57    /// is set.
58    #[serde(skip_serializing_if = "Option::is_none")]
59    pub recovery_sql: Option<String>,
60}
61
62impl ExportTypeReport {
63    pub fn has_fatal(&self) -> bool {
64        self.violations.iter().any(|v| v.fatal)
65    }
66
67    pub fn has_target_fail(&self) -> bool {
68        self.target_failures
69    }
70}
71
72/// Collect type mappings for one export from a live connection.
73pub fn collect_report(
74    config: &Config,
75    export: &ExportConfig,
76    column_overrides: &ColumnOverrides,
77    policy: &TypePolicy,
78    target: Option<ExportTarget>,
79    config_dir: &std::path::Path,
80    params: Option<&std::collections::HashMap<String, String>>,
81) -> Result<ExportTypeReport> {
82    let url = config.source.resolve_url()?;
83    let tls = config.source.tls.as_ref();
84    // Resolve the effective query the same way the export pipeline does, so the
85    // `table:` shortcut (and `query_file:` / `${var}` params) produce a real
86    // query instead of an empty string.
87    let query = export.resolve_query(config_dir, params)?;
88
89    let mut src: Box<dyn source::Source> = match config.source.source_type {
90        SourceType::Postgres => Box::new(source::postgres::PostgresSource::connect_with_tls(
91            &url, tls,
92        )?),
93        SourceType::Mysql => Box::new(source::mysql::MysqlSource::connect_with_tls(&url, tls)?),
94    };
95
96    let mappings = src.type_mappings(&query, column_overrides)?;
97    let mut violations = policy.validate(&mappings);
98
99    // Format-awareness: type resolution above is for the Parquet representation,
100    // but a CSV export rejects columns CSV can't serialize (lists, etc.) up front
101    // at writer creation. Surface those here so `check`/`--strict` agree with the
102    // run — otherwise a list column reports "safe" only for the CSV run to fail
103    // loud ("CSV cannot serialize column …"). Fatality follows the unsupported
104    // policy action (Fail under `--strict`, Warn otherwise).
105    if export.format == FormatType::Csv {
106        let fatal = policy.on_unsupported_type == PolicyAction::Fail;
107        for m in &mappings {
108            if let Some(dt) = m.arrow_type.as_ref()
109                && !crate::format::csv::csv_serializable(dt)
110            {
111                violations.push(PolicyViolation {
112                    column_name: m.column_name.clone(),
113                    fidelity: TypeFidelity::Unsupported,
114                    message: format!(
115                        "column '{}' (Arrow {dt:?}) cannot be serialized to CSV — \
116                         use `format: parquet` or drop it from the query",
117                        m.column_name
118                    ),
119                    fatal,
120                });
121            }
122        }
123    }
124
125    let mut target_failures = false;
126    let rows = mappings
127        .iter()
128        .map(|m| {
129            let (target_type, target_status, target_note, autoload_type, cast_sql) =
130                if let Some(tgt) = target {
131                    let spec = tgt.resolve_column(TargetInput::from(m));
132                    if spec.status == TargetStatus::Fail {
133                        target_failures = true;
134                    }
135                    // Surface the autoloaded type only when it diverges from the
136                    // native type — that divergence is the operator-facing point.
137                    let autoload =
138                        (spec.autoload_type != spec.target_type).then_some(spec.autoload_type);
139                    (
140                        Some(spec.target_type),
141                        Some(spec.status),
142                        spec.note,
143                        autoload,
144                        spec.cast_sql,
145                    )
146                } else {
147                    (None, None, None, None, None)
148                };
149            TypeReportRow {
150                column: m.column_name.clone(),
151                source_type: m.source_native_type.clone(),
152                rivet_type: rivet_type_label(&m.rivet_type),
153                arrow_type: m
154                    .arrow_type
155                    .as_ref()
156                    .map(|t| format!("{t:?}"))
157                    .unwrap_or_else(|| "-".into()),
158                fidelity: m.fidelity,
159                warnings: m.warnings.clone(),
160                target_type,
161                target_status,
162                target_note,
163                autoload_type,
164                cast_sql,
165            }
166        })
167        .collect();
168
169    // L5 recovery SQL (ADR-0014): a post-load transform for operators whose
170    // bare autoload would degrade types. `None` for DuckDB (faithful autoload)
171    // or when no target is set.
172    let recovery_sql =
173        target.and_then(|t| t.recovery_sql(&t.resolve_table(&mappings), &export.name));
174
175    Ok(ExportTypeReport {
176        export: export.name.clone(),
177        columns: rows,
178        violations,
179        target_failures,
180        recovery_sql,
181    })
182}
183
184/// Print the report as a human-readable table to stdout.
185pub fn print_table(report: &ExportTypeReport, target: Option<ExportTarget>) {
186    let col_w = col_width(&report.columns, |r| r.column.len());
187    let src_w = col_width(&report.columns, |r| r.source_type.len()).max("Source type".len());
188    let rv_w = col_width(&report.columns, |r| r.rivet_type.len()).max("Rivet type".len());
189    let arr_w = col_width(&report.columns, |r| r.arrow_type.len()).max("Arrow type".len());
190    let fid_w = "logical_string".len();
191
192    println!();
193    if let Some(tgt) = target {
194        println!("Export: {}  [target: {}]", report.export, tgt.label());
195    } else {
196        println!("Export: {}", report.export);
197    }
198
199    if target.is_some() {
200        let tgt_w = col_width(&report.columns, |r| {
201            r.target_type.as_deref().unwrap_or("-").len()
202        })
203        .max("Target type".len());
204        let sta_w = "Status".len();
205
206        println!(
207            "  {:<col_w$}  {:<src_w$}  {:<rv_w$}  {:<arr_w$}  {:<fid_w$}  {:<tgt_w$}  {:<sta_w$}",
208            "Column",
209            "Source type",
210            "Rivet type",
211            "Arrow type",
212            "Fidelity",
213            "Target type",
214            "Status"
215        );
216        println!(
217            "  {:-<col_w$}  {:-<src_w$}  {:-<rv_w$}  {:-<arr_w$}  {:-<fid_w$}  {:-<tgt_w$}  {:-<sta_w$}",
218            "", "", "", "", "", "", ""
219        );
220        for row in &report.columns {
221            let status_label = row.target_status.as_ref().map(|s| s.label()).unwrap_or("-");
222            let tgt_type = row.target_type.as_deref().unwrap_or("-");
223            let status_marker = match &row.target_status {
224                Some(TargetStatus::Fail) => " ✗",
225                Some(TargetStatus::Warn) => " ~",
226                _ => "",
227            };
228            println!(
229                "  {:<col_w$}  {:<src_w$}  {:<rv_w$}  {:<arr_w$}  {}{:<rest$}  {:<tgt_w$}  {}{}",
230                row.column,
231                row.source_type,
232                row.rivet_type,
233                row.arrow_type,
234                row.fidelity.label(),
235                "",
236                tgt_type,
237                status_label,
238                status_marker,
239                rest = fid_w - row.fidelity.label().len(),
240            );
241            if let Some(autoload) = &row.autoload_type {
242                println!("  {:<col_w$}    autoload: {}", "", autoload);
243            }
244            if let Some(note) = &row.target_note {
245                println!("  {:<col_w$}    note: {}", "", note);
246            }
247            if let Some(cast) = &row.cast_sql {
248                println!("  {:<col_w$}    recover: {}", "", cast);
249            }
250            for w in &row.warnings {
251                println!("  {:<col_w$}    warning: {}", "", w);
252            }
253        }
254    } else {
255        println!(
256            "  {:<col_w$}  {:<src_w$}  {:<rv_w$}  {:<arr_w$}  {:<fid_w$}",
257            "Column", "Source type", "Rivet type", "Arrow type", "Fidelity"
258        );
259        println!(
260            "  {:-<col_w$}  {:-<src_w$}  {:-<rv_w$}  {:-<arr_w$}  {:-<fid_w$}",
261            "", "", "", "", ""
262        );
263        for row in &report.columns {
264            println!(
265                "  {:<col_w$}  {:<src_w$}  {:<rv_w$}  {:<arr_w$}  {}{}",
266                row.column,
267                row.source_type,
268                row.rivet_type,
269                row.arrow_type,
270                row.fidelity.label(),
271                fidelity_marker(row.fidelity),
272            );
273            for w in &row.warnings {
274                println!("  {:<col_w$}    warning: {}", "", w);
275            }
276        }
277    }
278
279    if !report.violations.is_empty() {
280        println!();
281        for v in &report.violations {
282            let prefix = if v.fatal { "  FAIL" } else { "  WARN" };
283            println!("{}: {}", prefix, v.message);
284        }
285    }
286
287    if let Some(sql) = &report.recovery_sql {
288        println!();
289        println!(
290            "  {} type recovery — bare autoload degrades JSON/UUID→BYTES, naive",
291            target.map(|t| t.label()).unwrap_or("target")
292        );
293        println!("  timestamp→TIMESTAMP, array→RECORD; load with --autodetect then run:");
294        for line in sql.lines() {
295            println!("    {line}");
296        }
297    }
298}
299
300/// Emit newline-delimited JSON (one object per export).
301pub fn print_json(report: &ExportTypeReport) -> Result<()> {
302    let s = serde_json::to_string(report)?;
303    println!("{}", s);
304    Ok(())
305}
306
307fn col_width(rows: &[TypeReportRow], f: impl Fn(&TypeReportRow) -> usize) -> usize {
308    rows.iter().map(f).max().unwrap_or(8).max(8)
309}
310
311fn fidelity_marker(f: TypeFidelity) -> &'static str {
312    match f {
313        TypeFidelity::Lossy | TypeFidelity::Unsupported => " ✗",
314        TypeFidelity::LogicalString => " ~",
315        _ => "",
316    }
317}
318
319fn rivet_type_label(t: &crate::types::RivetType) -> String {
320    use crate::types::RivetType::*;
321    match t {
322        Bool => "bool".into(),
323        Int16 => "int2".into(),
324        Int32 => "int4".into(),
325        Int64 => "int8".into(),
326        UInt64 => "uint8".into(),
327        Float32 => "float4".into(),
328        Float64 => "float8".into(),
329        Decimal { precision, scale } => format!("decimal({precision},{scale})"),
330        Date => "date".into(),
331        Time { .. } => "time".into(),
332        Timestamp {
333            timezone: Some(_), ..
334        } => "timestamp_tz".into(),
335        Timestamp { timezone: None, .. } => "timestamp".into(),
336        String => "text".into(),
337        Text => "text".into(),
338        Binary => "binary".into(),
339        Json => "json".into(),
340        Uuid => "uuid".into(),
341        Enum => "enum".into(),
342        Interval => "interval".into(),
343        List { inner } => format!("list<{}>", rivet_type_label(inner)),
344        Unsupported { native_type, .. } => format!("unsupported({native_type})"),
345    }
346}
347
348#[cfg(test)]
349mod tests {
350    use super::*;
351    use crate::types::{RivetType, TypeFidelity};
352
353    // ── fidelity_marker ──────────────────────────────────────────────────────
354
355    #[test]
356    fn fidelity_marker_lossy_is_cross() {
357        assert_eq!(fidelity_marker(TypeFidelity::Lossy), " ✗");
358    }
359
360    #[test]
361    fn fidelity_marker_unsupported_is_cross() {
362        assert_eq!(fidelity_marker(TypeFidelity::Unsupported), " ✗");
363    }
364
365    #[test]
366    fn fidelity_marker_logical_string_is_tilde() {
367        assert_eq!(fidelity_marker(TypeFidelity::LogicalString), " ~");
368    }
369
370    #[test]
371    fn fidelity_marker_exact_is_empty() {
372        assert_eq!(fidelity_marker(TypeFidelity::Exact), "");
373    }
374
375    #[test]
376    fn fidelity_marker_compatible_is_empty() {
377        assert_eq!(fidelity_marker(TypeFidelity::Compatible), "");
378    }
379
380    // ── rivet_type_label ─────────────────────────────────────────────────────
381
382    #[test]
383    fn label_bool() {
384        assert_eq!(rivet_type_label(&RivetType::Bool), "bool");
385    }
386
387    #[test]
388    fn label_int64() {
389        assert_eq!(rivet_type_label(&RivetType::Int64), "int8");
390    }
391
392    #[test]
393    fn label_float64() {
394        assert_eq!(rivet_type_label(&RivetType::Float64), "float8");
395    }
396
397    #[test]
398    fn label_decimal_with_precision_and_scale() {
399        assert_eq!(
400            rivet_type_label(&RivetType::Decimal {
401                precision: 18,
402                scale: 2
403            }),
404            "decimal(18,2)"
405        );
406    }
407
408    #[test]
409    fn label_text() {
410        assert_eq!(rivet_type_label(&RivetType::Text), "text");
411    }
412
413    #[test]
414    fn label_uuid() {
415        assert_eq!(rivet_type_label(&RivetType::Uuid), "uuid");
416    }
417
418    #[test]
419    fn label_list_of_int64() {
420        let t = RivetType::List {
421            inner: Box::new(RivetType::Int64),
422        };
423        assert_eq!(rivet_type_label(&t), "list<int8>");
424    }
425
426    #[test]
427    fn label_unsupported_native_type() {
428        let t = RivetType::Unsupported {
429            native_type: "tsvector".into(),
430            reason: "not supported".into(),
431        };
432        assert_eq!(rivet_type_label(&t), "unsupported(tsvector)");
433    }
434
435    // ── col_width ────────────────────────────────────────────────────────────
436
437    #[test]
438    fn col_width_empty_returns_minimum_8() {
439        let rows: Vec<TypeReportRow> = vec![];
440        assert_eq!(col_width(&rows, |_r| 0), 8);
441    }
442
443    #[test]
444    fn col_width_short_values_returns_minimum_8() {
445        let row = TypeReportRow {
446            column: "a".into(),
447            source_type: "b".into(),
448            rivet_type: "c".into(),
449            arrow_type: "d".into(),
450            fidelity: TypeFidelity::Exact,
451            warnings: vec![],
452            target_type: None,
453            target_status: None,
454            target_note: None,
455            autoload_type: None,
456            cast_sql: None,
457        };
458        assert_eq!(col_width(&[row], |r| r.column.len()), 8);
459    }
460
461    #[test]
462    fn col_width_long_value_returns_that_length() {
463        let row = TypeReportRow {
464            column: "a_very_long_column_name".into(),
465            source_type: "int8".into(),
466            rivet_type: "int8".into(),
467            arrow_type: "Int64".into(),
468            fidelity: TypeFidelity::Exact,
469            warnings: vec![],
470            target_type: None,
471            target_status: None,
472            target_note: None,
473            autoload_type: None,
474            cast_sql: None,
475        };
476        let w = col_width(&[row], |r| r.column.len());
477        assert_eq!(w, "a_very_long_column_name".len());
478    }
479}