rivet-cli 0.16.1

Rivet: PostgreSQL/MySQL/SQL Server → Parquet/CSV (local, S3, GCS, Azure). Crate name rivet-cli; binary rivet.
Documentation
# Declarative bench matrix — the thin "WHAT to run" spec. The per-tool "HOW to
# run" lives in harness/sweep.py's runners (each tool's CLI is different, so we
# don't force them into one schema — same split as ingestr's scenarios.yaml +
# tool-specific overrides). See docs/bench/README.md.
#
# Point of the sweep: prove the memory gap is STRUCTURAL, not a small-data fluke.
# rivet sizes work to a byte budget → peak RSS stays ~flat as rows grow. A
# row-COUNT-batched tool (ingestr: 100k-row Arrow batches) is ALSO ~flat with row
# count — its peak is one batch — but ~20x higher, and that gap holds at every
# scale (measured: ~18-24x at 100k / 500k / 1M). The gap CLIMBS with row WIDTH, not
# count — a width sweep (narrow → wide fixtures) is the follow-up that shows ingestr
# actually diverging; the row-count sweep here shows the gap is constant + real.

source: postgres # postgresql://rivet:rivet@127.0.0.1:5432/rivet (the bench DB)
fixture: content_items # the wide 20-col fixture; sliced to each scale via CTAS LIMIT

# Row-count sweep. Add 10_000_000 on a box with the disk for it (~34 GB of wide
# rows); the harness slices the existing fixture, so scales above the seeded size
# are skipped with a logged note rather than silently truncated.
scales: [100000, 500000, 1000000]

runs: 3 # measured runs per (scale, tool); the MEDIAN is reported
warmup: 1 # discarded run(s) first, so caches are warm and the cold start isn't measured

# rivet is always run; others are optional and skipped (with a note) if the binary
# isn't on PATH — so the harness still runs standalone.
tools: [rivet, ingestr]