1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# Fly.io Configuration for AllSource Core
# Deploy from monorepo root (Core Dockerfile needs workspace context):
# flyctl deploy --config apps/core/fly.toml --dockerfile apps/core/Dockerfile
# Scale: flyctl scale count 2 --app allsource-core
= "allsource-core"
= "iad" # US East (Virginia) - change to your preferred region
[]
= "Dockerfile"
[]
= "allsource_core=info,tower_http=info"
= "::"
# PORT is automatically set by Fly.io
# Sustainable data strategy (Steps 1-6, landed 2026-04-27 — see
# docs/proposals/SUSTAINABLE_DATA_STRATEGY.md). The cache budget is
# the headline fix for issue #160 (2026-04-25 OOM during WAL replay):
# resident set is now bounded regardless of dataset size. 2 GiB on
# the 4 GiB VM leaves headroom for the Rust runtime, replication
# buffers, and the Parquet flush staging area.
= "2147483648"
# Step 6: checkpoint cadence (Parquet flush + WAL truncate). Bounds
# cold-restart replay to ~one minute of writes regardless of how
# large the dataset has grown.
= "60"
# Step 4: per-tenant compaction cadence (default 3600s / hourly is
# also set by code; explicit here so ops can grep fly.toml).
= "3600"
# Step 5: 30-day retention for the heartbeat-churning system tenant.
# Other tenants keep events forever by default.
= "30"
# One-shot flat→tenant Parquet migration (Step 1 / commit bdd3cd8).
# The migrator is idempotent: once the storage tree has no flat files
# at the root, every subsequent boot's pre-Core check is a fast
# directory listing. Safe to leave set permanently — see
# apps/core/docker-entrypoint.sh and
# docs/migrations/STORAGE_LAYOUT_FLAT_TO_TENANT.md.
= "true"
# Core is internal-only (bead t-0ff8). Public ingress removed 2026-04-17.
# All user-facing traffic enters through Control Plane's delegation layer.
# Internal .fly network (http://allsource-core.internal:3900) still works
# because other Fly apps in the same org reach machine IPs directly — no
# [http_service] / [[services]] block is needed. See
# docs/deployment/API_SUBDOMAIN_SETUP.md for the full topology.
#
# Rollback: restore [http_service] and re-allocate a public IPv6 with
# `fly ips allocate-v6 -a allsource-core`.
# Top-level machine orchestration (was nested under [http_service])
= true
= true
= 1
# Internal HTTP health check so auto_stop/auto_start don't kill a healthy
# machine. Runs inside the Fly network; does not expose anything publicly.
[]
= "10s"
= "30s"
= "GET"
= "/health"
= 3900
= "http"
= "5s"
= "http"
[[]]
# shared-cpu-2x to unlock 4 GiB (shared-cpu-1x caps at 2 GiB).
= "shared"
= 2
# 4 GiB host with a 2 GiB ALLSOURCE_CACHE_BYTES budget (above). The
# cache budget — not the VM size — is the load-bearing OOM control
# post-Step-3. The remaining ~2 GiB is headroom for the Rust runtime,
# replication buffers, and the Parquet flush staging area. The VM can
# be downsized once the budget runs at steady state with margin to
# spare; don't shrink it in the same change as flipping the budget on.
= 4096
[]
= "allsource_data"
= "/app/data"