otlp2parquet 0.9.1

Stream OpenTelemetry logs, metrics, and traces to Parquet files
Documentation
# otlp2parquet with MinIO - S3-compatible local storage
# One command: docker-compose up

services:
  # MinIO - S3-compatible object storage
  minio:
    image: minio/minio:latest
    ports:
      - "${MINIO_API_PORT:-9000}:9000"
      - "${MINIO_CONSOLE_PORT:-9001}:9001"
    environment:
      MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}
      MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin}
    volumes:
      - minio-data:/data
    command: server /data --console-address ":9001"
    networks:
      otlp_net:
        aliases:
          - otlp.minio
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
      interval: 30s
      timeout: 5s
      retries: 3
      start_period: 10s

  # Create bucket automatically
  minio-init:
    image: minio/mc:latest
    depends_on:
      minio:
        condition: service_healthy
    networks:
      otlp_net:
    entrypoint: >
      /bin/sh -c "
      /usr/bin/mc alias set myminio http://minio:9000 ${MINIO_ROOT_USER:-minioadmin} ${MINIO_ROOT_PASSWORD:-minioadmin};
      /usr/bin/mc mb myminio/${S3_BUCKET:-otlp} --ignore-existing;
      /usr/bin/mc anonymous set download myminio/${S3_BUCKET:-otlp};
      echo 'MinIO bucket ready';
      "

  # otlp2parquet
  otlp2parquet:
    # image: ghcr.io/smithclay/otlp2parquet:latest
    # For local development:
    build: .
    depends_on:
      minio:
        condition: service_healthy
      minio-init:
        condition: service_completed_successfully
    networks:
      otlp_net:
    ports:
      - "${HTTP_PORT:-4318}:4318"
    environment:
      # Storage (override for S3-compatible storage by setting OTLP2PARQUET_S3_ENDPOINT)
      OTLP2PARQUET_STORAGE_BACKEND: ${STORAGE_BACKEND:-s3}
      OTLP2PARQUET_S3_BUCKET: ${S3_BUCKET:-otlp}
      OTLP2PARQUET_S3_REGION: ${S3_REGION:-us-east-1}
      OTLP2PARQUET_S3_ENDPOINT: ${S3_ENDPOINT:-http://minio:9000}

      # Credentials for S3-compatible storage
      AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID:-minioadmin}
      AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY:-minioadmin}

      # Server (OTLP HTTP port)
      OTLP2PARQUET_HTTP_PORT: 4318
      OTLP2PARQUET_HTTP_HOST: 0.0.0.0
      RUST_LOG: ${RUST_LOG:-debug}

      # Batching (disable for tests to get immediate writes)
      OTLP2PARQUET_BATCHING_ENABLED: ${BATCHING_ENABLED:-false}

      # Prefix for storage paths (used for test isolation)
      OTLP2PARQUET_PREFIX: ${OTLP2PARQUET_PREFIX:-}

    restart: unless-stopped
    # Note: healthcheck removed because distroless image has no wget/curl
    # The test-e2e.sh script handles health checking from the host

  # Telemetrygen - Generate sample OTLP logs, metrics, and traces
  telemetrygen-traces:
    image: ghcr.io/open-telemetry/opentelemetry-collector-contrib/telemetrygen:latest
    networks:
      otlp_net:
    depends_on:
      - otlp2parquet
    command:
      - traces
      - --otlp-endpoint=otlp2parquet:4318
      - --otlp-http
      - --otlp-insecure
      - --duration=inf  # Run continuously
      - --rate=2        # 2 spans per second
      - --service=telemetrygen-traces
      - --status-code=Ok
    restart: unless-stopped

  telemetrygen-metrics:
    image: ghcr.io/open-telemetry/opentelemetry-collector-contrib/telemetrygen:latest
    depends_on:
      - otlp2parquet
    networks:
      otlp_net:
    command:
      - metrics
      - --otlp-endpoint=otlp2parquet:4318
      - --otlp-http
      - --otlp-insecure
      - --duration=inf  # Run continuously
      - --rate=2        # 2 metrics per second
      - --service=telemetrygen-metrics
    restart: unless-stopped

  telemetrygen-logs:
    image: ghcr.io/open-telemetry/opentelemetry-collector-contrib/telemetrygen:latest
    depends_on:
      - otlp2parquet
    networks:
      otlp_net:
    command:
      - logs
      - --otlp-endpoint=otlp2parquet:4318
      - --otlp-http
      - --otlp-insecure
      - --duration=inf  # Run continuously
      - --rate=2        # 2 logs per second
      - --service=telemetrygen-logs
    restart: unless-stopped

volumes:
  minio-data:
networks:
  otlp_net:
# Usage:
#
# Local (MinIO + plain Parquet):
#   docker-compose up
#   OTLP endpoint: http://localhost:4318/v1/logs (OpenTelemetry HTTP)
#   MinIO console: http://localhost:9001 (minioadmin/minioadmin)
#
# S3-compatible storage:
#   STORAGE_BACKEND=s3 S3_ENDPOINT=https://object.example.com AWS_ACCESS_KEY_ID=xxx AWS_SECRET_ACCESS_KEY=xxx docker-compose up
#
# Query with DuckDB:
#   duckdb -c "INSTALL httpfs; LOAD httpfs;
#   SET s3_endpoint='localhost:9000'; SET s3_url_style='path'; SET s3_use_ssl=false;
#   SET s3_access_key_id='minioadmin'; SET s3_secret_access_key='minioadmin';
#   SELECT * FROM read_parquet('s3://otlp/otel/otel_logs/data/*.parquet') LIMIT 10;"