otlp2parquet 0.9.0

Stream OpenTelemetry logs, metrics, and traces to Parquet files
Documentation
# otlp2parquet Configuration File Example
# ==========================================
# Copy this file to config.toml or .otlp2parquet.toml and customize for your environment
# Alternatively, use environment variables with the OTLP2PARQUET_ prefix
#
# Configuration Priority (highest to lowest):
#   1. Environment variables (OTLP2PARQUET_*)
#   2. This TOML file (config.toml or .otlp2parquet.toml)
#   3. Defaults (server mode)
#
# Quick Start:
#   1. Copy this file: cp config.example.toml config.toml
#   2. Uncomment and customize sections for your environment
#   3. Start server: cargo run

# ==============================================================================
# Batch Configuration
# ==============================================================================
# Controls in-memory batching to create optimal Parquet file sizes.
# Larger batches = fewer files = better query performance & lower storage costs.
[batch]
# Maximum number of rows per batch before flushing to storage
# Recommended: 10,000 - 1,000,000 rows depending on schema size
max_rows = 200_000

# Maximum bytes per batch before flushing (128 MB default)
# Recommended: 64 MB - 512 MB depending on available memory
max_bytes = 134_217_728  # 128 MB

# Maximum age of batch in seconds before flushing (time-based trigger)
# Recommended: 5-60 seconds depending on latency requirements
max_age_secs = 10

# Enable or disable batching entirely
# true = Batch data in memory before writing (recommended for server)
# false = Write immediately per request
enabled = true


# ==============================================================================
# Request Handling Configuration
# ==============================================================================
# Controls HTTP request validation and size limits
[request]
# Maximum HTTP payload size in bytes
# Recommendation: Set based on available memory and expected batch sizes
max_payload_bytes = 8_388_608  # 8 MB


# ==============================================================================
# Storage Configuration
# ==============================================================================
# Configures where Parquet files are written
#
# Supported backends:
#   - "fs": Local filesystem (development, testing)
#   - "s3": S3-compatible storage (production)
[storage]
# Storage backend type
# Options: "fs" | "s3"
backend = "fs"

# Parquet row group size (advanced tuning)
# Recommended: 32,768 - 1,048,576 rows per group

# --- Filesystem Storage (backend="fs") ---
[storage.fs]
# Local directory path for storing Parquet files
# Path structure (Hive-style partitioning):
#   logs/{service}/year={year}/month={month}/day={day}/hour={hour}/file.parquet
# Example: ./data/logs/my-service/year=2025/month=01/day=15/hour=10/abc123.parquet
path = "./data"

# --- S3 Storage (backend="s3") ---
# Supports: MinIO, LocalStack, and any S3-compatible storage
# [storage.s3]
# # Required: S3 bucket name
# bucket = "my-otlp-bucket"
#
# # Required: Storage region
# region = "us-east-1"
#
# # Optional: Custom S3 endpoint for S3-compatible services
# # Examples:
# #   - MinIO: "http://localhost:9000"
# #   - LocalStack: "http://localhost:4566"
# # endpoint = "http://localhost:9000"
#
# # Credentials: Auto-discovered from environment


# ==============================================================================
# Server-Specific Configuration
# ==============================================================================
# These settings apply when running as a long-lived HTTP server
[server]
# HTTP server listen address
# Format: "host:port"
# Default port: 4318 (OTLP HTTP standard port)
listen_addr = "0.0.0.0:4318"

# Log level: Controls verbosity of application logs
# Options: "trace" | "debug" | "info" | "warn" | "error"
log_level = "info"

# Log format: Output format for logs
# Options: "text" | "json"
log_format = "text"