allsource-core 0.20.0

High-performance event store core built in Rust
Documentation
# Fly.io Configuration for AllSource Core
# Deploy from monorepo root (Core Dockerfile needs workspace context):
#   flyctl deploy --config apps/core/fly.toml --dockerfile apps/core/Dockerfile
# Scale: flyctl scale count 2 --app allsource-core

app = "allsource-core"
primary_region = "iad"  # US East (Virginia) - change to your preferred region

[build]
  dockerfile = "Dockerfile"

[env]
  RUST_LOG = "allsource_core=info,tower_http=info"
  ALLSOURCE_HOST = "::"
  # PORT is automatically set by Fly.io

  # Sustainable data strategy (Steps 1-6, landed 2026-04-27 — see
  # docs/proposals/SUSTAINABLE_DATA_STRATEGY.md). The cache budget is
  # the headline fix for issue #160 (2026-04-25 OOM during WAL replay):
  # resident set is now bounded regardless of dataset size. 2 GiB on
  # the 4 GiB VM leaves headroom for the Rust runtime, replication
  # buffers, and the Parquet flush staging area.
  ALLSOURCE_CACHE_BYTES = "2147483648"
  # Step 6: checkpoint cadence (Parquet flush + WAL truncate). Bounds
  # cold-restart replay to ~one minute of writes regardless of how
  # large the dataset has grown.
  ALLSOURCE_CHECKPOINT_INTERVAL_SECONDS = "60"
  # Step 4: per-tenant compaction cadence (default 3600s / hourly is
  # also set by code; explicit here so ops can grep fly.toml).
  ALLSOURCE_SNAPSHOT_INTERVAL_SECONDS = "3600"
  # Step 5: 30-day retention for the heartbeat-churning system tenant.
  # Other tenants keep events forever by default.
  ALLSOURCE_RETENTION_SYSTEM_DAYS = "30"

  # One-shot flat→tenant Parquet migration (Step 1 / commit bdd3cd8).
  # The migrator is idempotent: once the storage tree has no flat files
  # at the root, every subsequent boot's pre-Core check is a fast
  # directory listing. Safe to leave set permanently — see
  # apps/core/docker-entrypoint.sh and
  # docs/migrations/STORAGE_LAYOUT_FLAT_TO_TENANT.md.
  ALLSOURCE_MIGRATE_FLAT_TO_TENANT = "true"

# Core is internal-only (bead t-0ff8). Public ingress removed 2026-04-17.
# All user-facing traffic enters through Control Plane's delegation layer.
# Internal .fly network (http://allsource-core.internal:3900) still works
# because other Fly apps in the same org reach machine IPs directly — no
# [http_service] / [[services]] block is needed. See
# docs/deployment/API_SUBDOMAIN_SETUP.md for the full topology.
#
# Rollback: restore [http_service] and re-allocate a public IPv6 with
# `fly ips allocate-v6 -a allsource-core`.

# Top-level machine orchestration (was nested under [http_service])
auto_stop_machines = true
auto_start_machines = true
min_machines_running = 1

# Internal HTTP health check so auto_stop/auto_start don't kill a healthy
# machine. Runs inside the Fly network; does not expose anything publicly.
[checks.health]
  grace_period = "10s"
  interval = "30s"
  method = "GET"
  path = "/health"
  port = 3900
  protocol = "http"
  timeout = "5s"
  type = "http"

[[vm]]
  # shared-cpu-2x to unlock 4 GiB (shared-cpu-1x caps at 2 GiB).
  cpu_kind = "shared"
  cpus = 2
  # 4 GiB host with a 2 GiB ALLSOURCE_CACHE_BYTES budget (above). The
  # cache budget — not the VM size — is the load-bearing OOM control
  # post-Step-3. The remaining ~2 GiB is headroom for the Rust runtime,
  # replication buffers, and the Parquet flush staging area. The VM can
  # be downsized once the budget runs at steady state with margin to
  # spare; don't shrink it in the same change as flipping the budget on.
  memory_mb = 4096

[mounts]
  source = "allsource_data"
  destination = "/app/data"