rivet-cli 0.9.2

Rivet: PostgreSQL/MySQL/SQL Server → Parquet/CSV (local, S3, GCS, Azure). Crate name rivet-cli; binary rivet.
Documentation
services:
  postgres:
    image: postgres:16
    environment:
      POSTGRES_USER: rivet
      POSTGRES_PASSWORD: rivet
      POSTGRES_DB: rivet
    ports:
      - "5432:5432"
    volumes:
      - ./dev/postgres/init.sql:/docker-entrypoint-initdb.d/init.sql
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U rivet"]
      interval: 5s
      timeout: 3s
      retries: 10

  # ---- Rivet state backend (RIVET_STATE_URL) -----------------------------------
  # Separate from the source `postgres` service so state is never mixed with
  # the data being extracted.  Connect with:
  #   RIVET_STATE_URL=postgresql://rivet:rivet@localhost:5433/rivet_state
  postgres-state:
    image: postgres:16
    environment:
      POSTGRES_USER: rivet
      POSTGRES_PASSWORD: rivet
      POSTGRES_DB: rivet_state
    ports:
      - "5433:5432"
    volumes:
      - postgres_state_data:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U rivet -d rivet_state"]
      interval: 5s
      timeout: 3s
      retries: 10

  # ---- Legacy Postgres versions (compatibility testing) ---------------------
  # Opt in via `docker compose --profile legacy up -d`. Each listens on a
  # non-default port so they coexist with the primary `postgres` service.
  # The same `init.sql` is used — it intentionally avoids features newer than
  # Postgres 11 so every supported version can run it.

  postgres-12:
    image: postgres:12
    profiles: ["legacy"]
    environment:
      POSTGRES_USER: rivet
      POSTGRES_PASSWORD: rivet
      POSTGRES_DB: rivet
    ports:
      - "5412:5432"
    volumes:
      - ./dev/postgres/init.sql:/docker-entrypoint-initdb.d/init.sql
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U rivet"]
      interval: 5s
      timeout: 3s
      retries: 10

  postgres-13:
    image: postgres:13
    profiles: ["legacy"]
    environment:
      POSTGRES_USER: rivet
      POSTGRES_PASSWORD: rivet
      POSTGRES_DB: rivet
    ports:
      - "5413:5432"
    volumes:
      - ./dev/postgres/init.sql:/docker-entrypoint-initdb.d/init.sql
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U rivet"]
      interval: 5s
      timeout: 3s
      retries: 10

  postgres-14:
    image: postgres:14
    profiles: ["legacy"]
    environment:
      POSTGRES_USER: rivet
      POSTGRES_PASSWORD: rivet
      POSTGRES_DB: rivet
    ports:
      - "5414:5432"
    volumes:
      - ./dev/postgres/init.sql:/docker-entrypoint-initdb.d/init.sql
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U rivet"]
      interval: 5s
      timeout: 3s
      retries: 10

  postgres-15:
    image: postgres:15
    profiles: ["legacy"]
    environment:
      POSTGRES_USER: rivet
      POSTGRES_PASSWORD: rivet
      POSTGRES_DB: rivet
    ports:
      - "5415:5432"
    volumes:
      - ./dev/postgres/init.sql:/docker-entrypoint-initdb.d/init.sql
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U rivet"]
      interval: 5s
      timeout: 3s
      retries: 10

  mysql:
    image: mysql:8.0
    # `mysql_native_password` is required so the `pool` profile's ProxySQL
    # service can authenticate to MySQL as `rivet`. ProxySQL 2.5 talks to
    # backend MySQL over plain TCP, where MySQL 8's default
    # `caching_sha2_password` requires either TLS or an RSA key exchange that
    # the backend connection path does not perform. The rust `mysql` driver
    # used by direct tests handles both plugins, so this only affects auth
    # — query semantics, performance, and tuning are unchanged.
    command:
      - "--default-authentication-plugin=mysql_native_password"
    environment:
      MYSQL_ROOT_PASSWORD: rivet
      MYSQL_USER: rivet
      MYSQL_PASSWORD: rivet
      MYSQL_DATABASE: rivet
    ports:
      - "3306:3306"
    volumes:
      - ./dev/mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
    healthcheck:
      test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-uroot", "-privet"]
      interval: 5s
      timeout: 3s
      retries: 10

  # ---- SQL Server -----------------------------------------------------------
  # MSSQL source engine (tiberius driver). This image has no
  # /docker-entrypoint-initdb.d hook, so the `rivet` database + fixture are
  # seeded after the container is healthy via `dev/mssql/init.sql`
  # (`make seed-mssql` / the live-test harness pipes it through sqlcmd).
  mssql:
    image: mcr.microsoft.com/mssql/server:2022-latest
    environment:
      ACCEPT_EULA: "Y"
      MSSQL_SA_PASSWORD: "Rivet_Passw0rd!"
      MSSQL_PID: Developer
    ports:
      - "1433:1433"
    healthcheck:
      # sqlcmd lives at tools18 on the 2022 image; `-C` trusts the self-signed
      # server cert the container generates on first boot.
      test:
        [
          "CMD-SHELL",
          "/opt/mssql-tools18/bin/sqlcmd -S localhost -U sa -P \"$$MSSQL_SA_PASSWORD\" -C -Q 'SELECT 1' || exit 1",
        ]
      interval: 10s
      timeout: 5s
      retries: 20

  # ---- Legacy MySQL ---------------------------------------------------------
  # MySQL 5.7 is EOL (Oct 2023) but still widely deployed. Use mysql_native_password
  # as the default auth plugin so older clients (and the `mysql` Rust crate's
  # pre-caching-sha2 path) connect without tweaks. Listens on 3357.

  mysql-57:
    image: mysql:5.7
    profiles: ["legacy"]
    # MySQL 5.7 has no official arm64 image; run it under x86_64 emulation on
    # Apple Silicon hosts. Native amd64 hosts ignore this field.
    platform: linux/amd64
    command:
      - "--default-authentication-plugin=mysql_native_password"
      - "--max-allowed-packet=67108864"
    environment:
      MYSQL_ROOT_PASSWORD: rivet
      MYSQL_USER: rivet
      MYSQL_PASSWORD: rivet
      MYSQL_DATABASE: rivet
    ports:
      - "3357:3306"
    volumes:
      # Dedicated 5.7 init that skips MySQL 8-only features (window functions
      # in `orders_sparse_for_export`). See dev/mysql/init_57.sql.
      - ./dev/mysql/init_57.sql:/docker-entrypoint-initdb.d/init.sql
    healthcheck:
      test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-uroot", "-privet"]
      interval: 5s
      timeout: 3s
      retries: 10

  minio:
    image: minio/minio:latest
    command: server /data --console-address ":9001"
    environment:
      MINIO_ROOT_USER: minioadmin
      MINIO_ROOT_PASSWORD: minioadmin
    ports:
      - "9000:9000"
      - "9001:9001"

  # GCS JSON API emulator (OpenDAL uses /storage/v1/... under this base URL).
  fake-gcs:
    image: fsouza/fake-gcs-server:latest
    ports:
      - "4443:4443"
    command:
      - "-scheme"
      - "http"
      - "-host"
      - "0.0.0.0"
      - "-port"
      - "4443"

  # --- Monitoring stack (Postgres metrics under load) ---
  postgres-exporter:
    image: prometheuscommunity/postgres-exporter:latest
    environment:
      DATA_SOURCE_NAME: "postgresql://rivet:rivet@postgres:5432/rivet?sslmode=disable"
    ports:
      - "9187:9187"
    depends_on:
      - postgres

  prometheus:
    image: prom/prometheus:latest
    volumes:
      - ./dev/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
    ports:
      - "9090:9090"
    depends_on:
      - postgres-exporter

  grafana:
    image: grafana/grafana:latest
    environment:
      GF_SECURITY_ADMIN_USER: admin
      GF_SECURITY_ADMIN_PASSWORD: admin
      GF_AUTH_ANONYMOUS_ENABLED: "true"
      GF_AUTH_ANONYMOUS_ORG_ROLE: Viewer
    volumes:
      - ./dev/grafana/provisioning:/etc/grafana/provisioning:ro
      - ./dev/grafana/dashboards:/var/lib/grafana/dashboards:ro
    ports:
      - "3000:3000"
    depends_on:
      - prometheus

  # Toxiproxy — network fault injection proxy for retry/resilience testing.
  # Postgres is proxied on port 15432; MySQL on port 13306.
  # ---- pgBouncer (pool safety tests) ----------------------------------------
  # Transaction-mode pooler with pool_size=1 so the same physical connection
  # is always reused — makes session-state leak tests deterministic.
  # Opt in: docker compose --profile pool up -d pgbouncer
  pgbouncer:
    # edoburu/pgbouncer is the working community image after bitnami yanked
    # `bitnami/pgbouncer:latest` (manifest unknown upstream as of 2026-05).
    image: edoburu/pgbouncer:latest
    profiles: ["pool"]
    environment:
      DATABASE_URL: "postgres://rivet:rivet@postgres:5432/rivet"
      POOL_MODE: transaction
      DEFAULT_POOL_SIZE: 1
      MAX_CLIENT_CONN: 50
      AUTH_TYPE: plain
      LISTEN_PORT: 6432
    ports:
      - "6432:6432"
    depends_on:
      postgres:
        condition: service_healthy
    healthcheck:
      # edoburu image does not bundle pg_isready; use a raw TCP connect.
      test: ["CMD-SHELL", "(echo > /dev/tcp/127.0.0.1/6432) >/dev/null 2>&1 || exit 1"]
      interval: 5s
      timeout: 3s
      retries: 10

  # ---- ProxySQL (MySQL pool safety + multiplexing detection tests) ---------
  # Single-backend ProxySQL configured to forward to `mysql:3306`.  The
  # docker-entry config sets up:
  #   - one backend server (`mysql:3306`, hostgroup 0)
  #   - one mysql_users row for `rivet/rivet` mapped to hostgroup 0
  #   - admin user proxysql/admin on the admin interface (port 6032)
  # so a `mysql -h 127.0.0.1 -P 6033 -urivet -privet` session reaches MySQL.
  #
  # Opt in: `docker compose --profile pool up -d proxysql`
  # The mysql:3306 service is depends_on so it must already exist (it is the
  # default profile, started by `docker compose up -d mysql`).
  proxysql:
    image: proxysql/proxysql:2.5.5
    profiles: ["pool"]
    ports:
      # 6033: client-facing MySQL protocol port; 6032: ProxySQL admin SQL.
      - "6033:6033"
      - "6032:6032"
    depends_on:
      mysql:
        condition: service_healthy
    volumes:
      - ./dev/proxysql/proxysql.cnf:/etc/proxysql.cnf:ro
    healthcheck:
      # Wait for the client-facing 6033 port to accept connections.  ProxySQL's
      # admin port (6032) comes up first; using 6033 ensures backend wiring is
      # in place before tests dial in.
      test: ["CMD-SHELL", "(echo > /dev/tcp/127.0.0.1/6033) >/dev/null 2>&1 || exit 1"]
      interval: 5s
      timeout: 3s
      retries: 20

  toxiproxy:
    image: ghcr.io/shopify/toxiproxy:latest
    ports:
      - "8474:8474"   # HTTP API
      - "15432:15432"  # proxied Postgres
      - "13306:13306"  # proxied MySQL
    depends_on:
      - postgres
      - mysql

  # ---- DuckDB & ClickHouse — Parquet validators (ADR-0014) ─────────────────
  # These two services are kept running so live type-roundtrip tests can pipe
  # the *same* Parquet artifact through two independent readers and assert
  # round-trip on values + Parquet-level type semantics (decimals, JSON, UUID,
  # tz-aware timestamps, binary, lists). They are not productive components —
  # they are oracles for the Rivet → Parquet layer.
  #
  # Shared bind mount `./tests/.live-tmp` is the handoff: Rivet tests write a
  # Parquet file there from the host, then `docker exec` reads it inside the
  # container at the same path under `/work`.
  duckdb:
    image: python:3.12-slim
    # Holds a `duckdb` Python install — talked to via `docker exec rivet-duckdb
    # python -c "..."`. No port is published; tests reach it through `docker
    # exec`, not over TCP.
    entrypoint:
      - bash
      - -lc
      # `pytz` is required by the duckdb Python adapter to materialise
      # TIMESTAMP WITH TIME ZONE values — without it, every `SELECT *` over
      # a Parquet with a tz-aware timestamp column raises ModuleNotFoundError.
      # `pyarrow` is the third independent Parquet reader used by tests
      # (alongside duckdb and clickhouse) — it is the reference Arrow
      # implementation, so it sees field metadata and statistics exactly
      # the way we wrote them.
      - "pip install --quiet --disable-pip-version-check \
           duckdb==1.4.1 pytz==2024.1 pyarrow==18.1.0 && \
         python -c 'import duckdb, pytz, pyarrow; print(\"duckdb\", duckdb.__version__, \"pyarrow\", pyarrow.__version__)' && \
         exec tail -f /dev/null"
    container_name: rivet-duckdb
    init: true
    volumes:
      - ./tests/.live-tmp:/work
    healthcheck:
      # Healthy once the python `duckdb` module imports — pip install finished.
      test: ["CMD", "python", "-c", "import duckdb; duckdb.sql('select 1')"]
      interval: 5s
      timeout: 5s
      retries: 30
      start_period: 30s

  clickhouse:
    image: clickhouse/clickhouse-server:24.8
    container_name: rivet-clickhouse
    # `ulimits` are required by the official image at runtime.
    ulimits:
      nofile:
        soft: 262144
        hard: 262144
    environment:
      CLICKHOUSE_USER: rivet
      CLICKHOUSE_PASSWORD: rivet
      CLICKHOUSE_DB: rivet
    ports:
      # MinIO already publishes 9000 (S3) and 9001 (console), so the ClickHouse
      # native protocol is exposed on 9002 to avoid collisions. Tests use the
      # HTTP interface on 8123 anyway.
      - "8123:8123"
      - "9002:9000"
    volumes:
      - ./tests/.live-tmp:/work
      # Allow the `file()` table function to read Parquet from /work — by
      # default ClickHouse only trusts /var/lib/clickhouse/user_files.
      - ./dev/clickhouse/user_files.xml:/etc/clickhouse-server/config.d/user_files.xml:ro
    healthcheck:
      test: ["CMD", "wget", "--quiet", "--tries=1", "--spider",
             "http://127.0.0.1:8123/ping"]
      interval: 5s
      timeout: 3s
      retries: 30
      start_period: 15s

volumes:
  postgres_state_data: