greentic-start-dev 1.1.26647709683

Greentic lifecycle runner for start/restart/stop orchestration
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
//! Slim HTTP serving loop for the no-bundle, runtime-config-activated revision
//! path (the B3 → execution-bridge step).
//!
//! When `greentic start` runs without `--bundle`/`--config`, the runtime-config
//! activation ([`crate::revision_boot`]) loads every pinned revision into an
//! embedded [`RunnerHost`] and produces a [`RevisionIngressRouting`] bundle. The
//! legacy [`crate::http_ingress`] server cannot serve that: it is hard-wired to a
//! single `DemoRunnerHost` rooted at a bundle directory, neither of which exists
//! here. This module is the dedicated, minimal serving surface for the new path.
//!
//! Per request it: resolves the bound deployment + tenant from `(host, path)`,
//! asks the dispatcher to pick a revision (honouring stickiness cookies), maps
//! the request body to a canonical [`Activity`], runs it against that revision's
//! runtime via [`RunnerHost::handle_activity_for_revision`], and serializes the
//! reply activities back as a JSON array.
//!
//! This is the **generic-JSON vertical slice**: the body is treated as a generic
//! JSON activity (a `text` field becomes a messaging activity, anything else a
//! custom `http.request` activity routed to the pack's entry flow). Provider
//! webhook parsing (Slack/Telegram signature-verified `ingest_http`), WebChat /
//! DirectLine, WebSocket upgrades, and static-asset serving under revisions are
//! deliberately out of scope and stay on the legacy ingress for now.
//!
//! Because provider parsing is deferred, the slice is **fail-closed** rather than
//! a catch-all: a request whose `(path, method)` matches the selected revision's
//! declared provider route is refused (`501`) instead of being run generically —
//! that would skip the provider's signature/token verification. Only `POST`
//! requests to non-provider paths run the entry flow; everything else is `404`
//! (no deployment bound) / `405` (wrong method) / `501` (provider path). Caller-
//! asserted identity (`x-greentic-user`/`-session`, body `user`/`session`) is
//! honoured only from loopback peers, so a remote caller cannot impersonate a
//! user/session or pin a chosen revision (see `caller_identity`).

use std::collections::HashMap;
use std::convert::Infallible;
use std::net::{IpAddr, SocketAddr};
use std::sync::Arc;
use std::sync::mpsc;
use std::thread::{self, JoinHandle};
use std::time::Duration;

use anyhow::{Context, Result};
use arc_swap::ArcSwap;
use greentic_deploy_spec::ids::{DeploymentId, RevisionId};
use http_body_util::{BodyExt, Full, Limited};
use hyper::body::{Bytes, Incoming};
use hyper::server::conn::http1::Builder as Http1Builder;
use hyper::service::service_fn;
use hyper::{Request, Response, StatusCode, header};
use hyper_util::rt::tokio::TokioIo;
use serde_json::Value;
use tokio::net::TcpListener;
use tokio::runtime::{Handle, Runtime};
use tokio::sync::oneshot;

use greentic_runner_host::{Activity, RunnerHost};

use greentic_deploy_spec::{DEFAULT_LISTEN_ADDR, EnvironmentHostConfig};

use crate::deployment_routes::RevisionIngressRouting;
use crate::http_routes::{HttpRouteTable, RevisionScope};
use crate::operator_log;
use crate::revision_dispatcher::{
    DispatchRequest, RevisionDispatcher, RevisionKey, SetCookieDirective, cookie_name,
};
use crate::revision_drain::{
    DrainRequest, NoopRevisionTeardown, RevisionDrainCoordinator, RevisionLivenessProbe,
    RevisionTeardown,
};

/// Largest request body the revision ingress accepts, in bytes. Even on the
/// loopback / local posture a cap is required so one oversized POST cannot
/// exhaust memory before the JSON parse rejects it.
const MAX_BODY_BYTES: usize = 1 << 20; // 1 MiB

/// Activated host + routing as a single coherent unit. Requests bind to one
/// `Arc<Activation>` at the top of [`serve`] and use the same `host` and
/// `routing` for the rest of their lifetime — so a [`RevisionServer::reload`]
/// that swaps the slot mid-request cannot tear (dispatch via the new
/// dispatcher, execute against the old host, or vice versa).
pub(crate) struct Activation {
    pub host: Arc<RunnerHost>,
    pub routing: Arc<RevisionIngressRouting>,
}

/// Inputs for [`RevisionServer::start`]: where to listen plus the initial
/// activation the server serves over. Reload swaps in a new [`Activation`] via
/// [`RevisionServer::reload`].
pub(crate) struct RevisionServeConfig {
    pub bind_addr: SocketAddr,
    pub activation: Arc<Activation>,
}

/// Per-connection shared state. Holds the live activation behind an
/// [`ArcSwap`] so the producer (file-watcher / HTTP signal) can hot-attach new
/// revisions without restarting the listener. Each request reads `slot` once
/// at the top of [`serve`] and threads that snapshot through dispatch +
/// execute. The env id is read from `activation.routing.dispatcher.env_id()`
/// — not stored twice.
struct ServeState {
    slot: ArcSwap<Activation>,
    /// Address the listener bound to (after the `find_available_port` bump).
    /// Reported by `/status` so operators see the actual interface + port
    /// rather than what the user requested.
    bound_addr: SocketAddr,
}

impl ServeState {
    /// Snapshot the live activation. Holding the returned `Arc` keeps the
    /// activation alive across `.await` points, even if a concurrent reload
    /// swaps the slot — the reload's drain window still ensures the old
    /// activation outlives every in-flight request that pinned it.
    fn current(&self) -> Arc<Activation> {
        self.slot.load_full()
    }
}

/// [`RevisionKey`]s present in `prev` but absent from `next`. Used by
/// [`RevisionServer::reload`] to identify revisions the operator just removed
/// so the drain coordinator can fire one drain per removed revision against
/// the OLD activation.
fn removed_revisions(prev: &RevisionDispatcher, next: &RevisionDispatcher) -> Vec<RevisionKey> {
    prev.revision_keys()
        .into_iter()
        .filter(|(deployment_id, _bundle_id, revision_id)| {
            !next.contains_revision(*deployment_id, *revision_id)
        })
        .collect()
}

/// Liveness probe handed to each drain coordinator so it can suppress a
/// stale `RevisionEvicted` event when the revision it's draining is rolled
/// back / re-added into a newer activation before the drain window elapses.
///
/// Checks the server's live activation slot, not the OLD activation being
/// drained: if the revision reappears in whatever the server is currently
/// serving (a strictly newer activation than `draining_dispatcher`), the
/// eviction is stale and must not be reported.
struct SlotLivenessProbe {
    state: Arc<ServeState>,
    /// The dispatcher this coordinator is draining. Identity guard: if the
    /// live slot still points at it, the revision is NOT live "elsewhere" —
    /// it's the same routing table, so the eviction event should fire
    /// (matches a direct drain of the live dispatcher).
    draining_dispatcher: Arc<RevisionDispatcher>,
}

impl RevisionLivenessProbe for SlotLivenessProbe {
    fn is_live_elsewhere(&self, deployment_id: DeploymentId, revision_id: RevisionId) -> bool {
        let live = self.state.current();
        // Same dispatcher instance ⇒ we're draining the live routing table,
        // so the revision isn't live in a NEWER activation. Every reload
        // swaps in a freshly-built dispatcher `Arc`, so pointer identity is
        // a sound discriminator.
        if Arc::ptr_eq(&live.routing.dispatcher, &self.draining_dispatcher) {
            return false;
        }
        live.routing
            .dispatcher
            .contains_revision(deployment_id, revision_id)
    }
}

/// Spawn one [`RevisionDrainCoordinator::run`] task per removed revision
/// against `prev`'s dispatcher. Each task owns its own `Arc` to the OLD
/// activation so the dispatcher and route table outlive the overlap-window
/// drop spawned by [`RevisionServer::reload`]. WS close and teardown are
/// both no-ops in N2.3 — see [`crate::revision_drain`] module docs for the
/// Phase D follow-up.
///
/// Each task carries a [`SlotLivenessProbe`] over `state` so a revision
/// rolled back into a newer activation within the drain window does not
/// produce a stale `RevisionEvicted` event.
fn spawn_revision_drains(
    runtime_handle: &Handle,
    state: Arc<ServeState>,
    prev: Arc<Activation>,
    removed: Vec<RevisionKey>,
    drain_window: Duration,
) {
    let drain_seconds: u32 = drain_window.as_secs().try_into().unwrap_or(u32::MAX);
    let teardown: Arc<dyn RevisionTeardown> = Arc::new(NoopRevisionTeardown);
    for (deployment_id, bundle_id, revision_id) in removed {
        let Some(tenant) = prev
            .routing
            .deployment_routes
            .tenant_for(deployment_id)
            .map(str::to_string)
        else {
            // The route table is built from the SAME runtime-config the
            // dispatcher snapshotted, so a revision known to the dispatcher
            // but missing from the route table is a structural inconsistency.
            // Surface it loudly and skip — emitting telemetry on a tenantless
            // drain would corrupt downstream rollouts of multi-tenant metrics.
            operator_log::warn(
                module_path!(),
                format!(
                    "skipping drain for revision {revision_id} of deployment \
                     {deployment_id}: no tenant binding found in OLD activation \
                     route table (deployment likely removed before reload diff)"
                ),
            );
            continue;
        };
        let dispatcher = Arc::clone(&prev.routing.dispatcher);
        let teardown = Arc::clone(&teardown);
        let liveness: Arc<dyn RevisionLivenessProbe> = Arc::new(SlotLivenessProbe {
            state: Arc::clone(&state),
            draining_dispatcher: Arc::clone(&dispatcher),
        });
        runtime_handle.spawn(async move {
            let coord = RevisionDrainCoordinator::with_noop_ws(dispatcher, teardown)
                .with_liveness_probe(liveness);
            let req = DrainRequest {
                tenant: tenant.as_str(),
                deployment_id,
                bundle_id,
                revision_id,
                drain_seconds,
            };
            if let Err(err) = coord.run(req).await {
                operator_log::warn(
                    module_path!(),
                    format!(
                        "drain coordinator for revision {revision_id} of \
                         deployment {deployment_id} returned an error: {err}"
                    ),
                );
            }
        });
    }
}

/// What [`RevisionServer::reload`] returns so the producer can log / emit
/// telemetry describing the transition without re-reading the dispatcher.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct ReloadReport {
    pub prev_deployments: usize,
    pub prev_revisions: usize,
    pub new_deployments: usize,
    pub new_revisions: usize,
}

/// A running revision ingress server on its own thread + Tokio runtime, mirroring
/// the legacy ingress's lifecycle so `run_start` can `stop()` it on shutdown.
pub(crate) struct RevisionServer {
    shutdown: Option<oneshot::Sender<()>>,
    handle: Option<JoinHandle<Result<()>>>,
    actual_port: u16,
    /// Shared state holding the [`ArcSwap`] activation slot. Kept here so
    /// [`reload`](Self::reload) can swap a new [`Activation`] in and
    /// [`counts`](Self::counts) can read the live snapshot.
    state: Arc<ServeState>,
    /// Handle to the listener thread's Tokio runtime. [`reload`](Self::reload)
    /// schedules the overlap-window drop of the previous activation on it so
    /// any async resources held by the old [`RunnerHost`] tear down on the
    /// same runtime that built them.
    runtime_handle: Handle,
    /// Serializes [`reload`](Self::reload) calls. Without it the
    /// `load_full(prev) → bump_generations → swap(new)` sequence is not
    /// atomic across concurrent producers: two reloads can both observe
    /// the same prev, both bump generations from it, then both swap — the
    /// second reload's generation bump is lost relative to the first's
    /// published activation, so cookies minted in the brief window the
    /// first reload was live still verify against the second's dispatcher.
    ///
    /// N2.2's file-watcher is a single producer today, but an admin HTTP
    /// reload signal (or any future second producer) would violate that
    /// invariant; guarding the swap primitive here means the type system
    /// cannot be tricked.
    reload_lock: std::sync::Mutex<()>,
    /// Per-deployment-id generation high-watermark, surviving across
    /// activations including ones that drop a deployment entirely.
    ///
    /// Without this map, a bump driven only by the previous dispatcher
    /// would miss deployments that disappeared from runtime-config: a
    /// remove → re-add sequence within cookie/pin TTL would mint a fresh
    /// dispatcher at the same generation the original served at, and
    /// cookies signed before the removal would still verify after the
    /// re-add. The watermark tombstones removed deployments so a re-added
    /// one always bumps past its prior generation.
    ///
    /// Updated by [`reload`](Self::reload) by absorbing both the previous
    /// and new activations on every swap. Initialized from the initial
    /// activation at [`start`](Self::start) so cookie invalidation works
    /// even on the very first reload after boot.
    generation_watermark: std::sync::Mutex<HashMap<DeploymentId, u64>>,
}

impl RevisionServer {
    /// Bind, spawn the serving thread, and return once the listener is up (or the
    /// bind failed). The requested port is bumped to the next free one if taken,
    /// matching the legacy ingress.
    pub(crate) fn start(config: RevisionServeConfig) -> Result<Self> {
        let requested_port = config.bind_addr.port();
        let listen_ip = config.bind_addr.ip();
        let actual_port =
            crate::port_utils::find_available_port(&listen_ip.to_string(), requested_port, 10)
                .context("failed to find available port for revision ingress")?;
        if actual_port != requested_port {
            operator_log::warn(
                module_path!(),
                format!(
                    "requested port {requested_port} is in use; using port {actual_port} instead"
                ),
            );
        }
        let addr = SocketAddr::new(listen_ip, actual_port);

        let state = Arc::new(ServeState {
            slot: ArcSwap::new(config.activation),
            bound_addr: addr,
        });
        // Cloned into the listener thread; the original lives on as the
        // [`RevisionServer::state`] handle so [`reload`] / [`counts`] read the
        // same slot the running listener reads.
        let listener_state = Arc::clone(&state);

        let (tx, rx) = oneshot::channel();
        // The startup channel ships the Tokio runtime handle alongside the
        // bind result so [`reload`] can schedule the overlap-window drop of
        // the previous activation on the listener thread's runtime — the same
        // runtime any held async resources were built on.
        let (startup_tx, startup_rx) = mpsc::channel::<Result<Handle>>();
        let handle = thread::Builder::new()
            .name("revision-ingress".to_string())
            .spawn(move || -> Result<()> {
                let runtime =
                    match Runtime::new().context("failed to create revision ingress runtime") {
                        Ok(runtime) => runtime,
                        Err(err) => {
                            let _ = startup_tx.send(Err(anyhow::anyhow!("{err:#}")));
                            return Err(err);
                        }
                    };
                let runtime_handle = runtime.handle().clone();
                runtime.block_on(async move {
                    let listener = match TcpListener::bind(addr)
                        .await
                        .context("failed to bind revision ingress listener")
                    {
                        Ok(listener) => listener,
                        Err(err) => {
                            let _ = startup_tx.send(Err(anyhow::anyhow!("{err:#}")));
                            return Err(err);
                        }
                    };
                    let _ = startup_tx.send(Ok(runtime_handle));
                    operator_log::info(
                        module_path!(),
                        format!("revision ingress listening on http://{addr}"),
                    );
                    let mut shutdown = rx;
                    loop {
                        tokio::select! {
                            _ = &mut shutdown => break,
                            accept = listener.accept() => match accept {
                                Ok((stream, peer)) => {
                                    let connection_state = listener_state.clone();
                                    // Caller-asserted identity (see `serve`) is only
                                    // honoured from loopback peers; capture it here.
                                    // `to_canonical` so an IPv4-mapped IPv6 peer
                                    // (`::ffff:127.0.0.1`, seen under an IPv6 bind)
                                    // still reads as loopback.
                                    let peer_is_loopback = peer.ip().to_canonical().is_loopback();
                                    tokio::spawn(async move {
                                        let service = service_fn(move |req| {
                                            handle_connection(
                                                req,
                                                connection_state.clone(),
                                                peer_is_loopback,
                                            )
                                        });
                                        let io = TokioIo::new(stream);
                                        if let Err(err) =
                                            Http1Builder::new().serve_connection(io, service).await
                                        {
                                            operator_log::error(
                                                module_path!(),
                                                format!("revision ingress connection error: {err}"),
                                            );
                                        }
                                    });
                                }
                                Err(err) => operator_log::error(
                                    module_path!(),
                                    format!("revision ingress accept error: {err}"),
                                ),
                            },
                        }
                    }
                    Ok(())
                })
            })?;
        let runtime_handle = startup_rx
            .recv()
            .context("failed to receive revision ingress startup result")??;

        // Seed the watermark from the initial activation so the very first
        // reload bumps generations off it — otherwise cookies signed against
        // the cold-start activation could survive a remove → re-add that
        // happens before any other reload has populated the watermark.
        let mut initial_watermark: HashMap<DeploymentId, u64> = HashMap::new();
        state
            .slot
            .load()
            .routing
            .dispatcher
            .absorb_into_watermark(&mut initial_watermark);

        Ok(Self {
            shutdown: Some(tx),
            handle: Some(handle),
            actual_port,
            state,
            runtime_handle,
            reload_lock: std::sync::Mutex::new(()),
            generation_watermark: std::sync::Mutex::new(initial_watermark),
        })
    }

    /// The port the server actually bound (may differ from the request if it was
    /// taken).
    pub(crate) fn actual_port(&self) -> u16 {
        self.actual_port
    }

    /// `(deployment_count, revision_count)` from a single snapshot of the
    /// live activation's dispatcher — the same source `/status` reads. Used
    /// by the startup banner and post-reload logging so banner and `/status`
    /// cannot disagree.
    pub(crate) fn counts(&self) -> (usize, usize) {
        self.state.slot.load().routing.dispatcher.counts()
    }

    /// Swap the live activation. Atomically replaces the slot so the next
    /// request reaches the new host + routing; every request that already
    /// snapshotted the previous activation (via [`ServeState::current`] at
    /// the top of [`serve`]) keeps running against it for the rest of its
    /// lifetime.
    ///
    /// The previous activation is held alive for `drain_window` on the
    /// listener thread's runtime so async resources owned by the old
    /// [`RunnerHost`] (timer-handle aborts, Redis connection manager drops,
    /// telemetry exporters) tear down on the same runtime that built them,
    /// not on a bare OS thread. After the window, the Arc is dropped — if no
    /// in-flight request still pins it, the host and its [`TenantRuntime`]s
    /// drop on the spot; otherwise the drop is deferred until the last
    /// request completes.
    ///
    /// This is the swap primitive the N2.2 file-watcher + reload signal
    /// producer calls. A `drain_window` of zero drops the previous
    /// activation immediately (only safe in tests, where the producer
    /// controls request scheduling).
    ///
    /// Per-deployment dispatcher generations are bumped against a
    /// server-level high-watermark BEFORE the swap (see
    /// [`crate::revision_dispatcher::RevisionDispatcher::bump_generations_from_watermark`]
    /// and [`Self::generation_watermark`]) so any stickiness cookie or
    /// session pin minted against an earlier activation is invalidated and
    /// the next request re-picks under the new traffic split. The
    /// watermark tracks every deployment id this server has ever seen,
    /// including ones that have been removed and re-added — so a
    /// remove → re-add rollback within cookie/pin TTL doesn't leak
    /// stickiness from before the removal.
    ///
    /// Holds the [`reload_lock`](Self::reload_lock) for the whole sequence
    /// so concurrent producers (file-watcher + admin signal) cannot race
    /// the `load_full(prev) → bump_generations → swap(new)` steps and
    /// lose a generation bump.
    pub(crate) fn reload(&self, new: Activation, drain_window: Duration) -> ReloadReport {
        // Serialize concurrent reloads so the load_full + bump_generations
        // + swap sequence is atomic relative to other producers. See the
        // field doc on `reload_lock`.
        let _reload_guard = self.reload_lock.lock().expect("reload lock poisoned");
        let new_arc = Arc::new(new);
        // Snapshot the previous activation BEFORE publishing the new one so
        // the dispatcher generation bump runs against a stable reference.
        // `swap` would also return the prev pointer atomically with the
        // store, but doing the bump first means we publish a dispatcher
        // whose generations are already correct for the very first
        // dispatch under the new activation.
        let prev = self.state.slot.load_full();
        // `SlotLivenessProbe` (the drain path's stale-eviction guard) relies
        // on every reload publishing a freshly-built dispatcher `Arc`, so it
        // can use `Arc::ptr_eq` to tell the OLD dispatcher apart from the
        // live one. Assert that invariant here: if a future optimization ever
        // reuses a dispatcher `Arc` across reloads, this fails loudly in tests
        // rather than silently breaking eviction telemetry.
        debug_assert!(
            !Arc::ptr_eq(&prev.routing.dispatcher, &new_arc.routing.dispatcher),
            "reload must build a fresh dispatcher Arc (SlotLivenessProbe ptr_eq guard depends on it)"
        );
        // Update the generation watermark and bump the new dispatcher off
        // it. Absorbing prev → bump new → absorb new keeps the watermark
        // strictly monotonic across every deployment id we've ever served
        // (including ids that have been removed), so a re-introduced id
        // always lands at a generation strictly greater than any cookie/pin
        // could still be holding.
        {
            let mut watermark = self
                .generation_watermark
                .lock()
                .expect("generation watermark lock poisoned");
            prev.routing
                .dispatcher
                .absorb_into_watermark(&mut watermark);
            new_arc
                .routing
                .dispatcher
                .bump_generations_from_watermark(&watermark);
            new_arc
                .routing
                .dispatcher
                .absorb_into_watermark(&mut watermark);
        }
        // Diff OLD vs NEW revision sets BEFORE the swap, so the drain
        // coordinator (below) runs against a stable snapshot of "what was
        // serving until now" — independent of the publish ordering.
        let removed = removed_revisions(&prev.routing.dispatcher, &new_arc.routing.dispatcher);
        let (new_deployments, new_revisions) = new_arc.routing.dispatcher.counts();
        let prev = self.state.slot.swap(new_arc);
        let (prev_deployments, prev_revisions) = prev.routing.dispatcher.counts();
        // Fire one drain coordinator per removed revision against the OLD
        // activation. The coordinator marks the revision draining on OLD's
        // dispatcher (cookie/pin holders re-dispatch immediately), waits
        // `drain_window`, then evicts it from OLD's routing table — emitting
        // `RolloutEvent::RevisionDraining` + `RevisionEvicted` along the way.
        // The teardown is a no-op: the OLD activation drops wholesale at the
        // bottom of this fn after `drain_window`, taking the `RunnerHost`'s
        // `ActivePacks` with it. A real `ActivePacks::remove_revision` adapter
        // is the Phase D follow-up (see `revision_drain` module docs).
        if !removed.is_empty() && !drain_window.is_zero() {
            spawn_revision_drains(
                &self.runtime_handle,
                Arc::clone(&self.state),
                Arc::clone(&prev),
                removed,
                drain_window,
            );
        }
        if drain_window.is_zero() {
            drop(prev);
        } else {
            self.runtime_handle.spawn(async move {
                tokio::time::sleep(drain_window).await;
                drop(prev);
            });
        }
        ReloadReport {
            prev_deployments,
            prev_revisions,
            new_deployments,
            new_revisions,
        }
    }

    /// Signal shutdown and join the serving thread.
    pub(crate) fn stop(mut self) -> Result<()> {
        if let Some(tx) = self.shutdown.take() {
            let _ = tx.send(());
        }
        if let Some(handle) = self.handle.take() {
            handle
                .join()
                .map_err(|err| anyhow::anyhow!("revision ingress server panicked: {err:?}"))??;
        }
        Ok(())
    }
}

/// `service_fn` adapter: collapse the `Ok`/`Err` response halves into the single
/// infallible response hyper wants.
async fn handle_connection(
    req: Request<Incoming>,
    state: Arc<ServeState>,
    peer_is_loopback: bool,
) -> Result<Response<Full<Bytes>>, Infallible> {
    Ok(match serve(req, state, peer_is_loopback).await {
        Ok(response) => response,
        Err(response) => response,
    })
}

/// Resolve → dispatch → execute for a single request. `Err` carries a ready HTTP
/// error response; it is never a fall-through to any other handler.
async fn serve(
    req: Request<Incoming>,
    state: Arc<ServeState>,
    peer_is_loopback: bool,
) -> Result<Response<Full<Bytes>>, Response<Full<Bytes>>> {
    let method = req.method().clone();
    let path = req.uri().path().to_string();

    if let Some(response) = try_probe_response(&path, &state) {
        return Ok(response);
    }

    // Snapshot the activation ONCE per request so dispatch and execute see a
    // coherent (host, routing) pair. A concurrent [`RevisionServer::reload`]
    // swap is observed by the *next* request; this one keeps running against
    // the activation it pinned here.
    let activation = state.current();

    let host_header = header_str(req.headers(), header::HOST.as_str());
    let cookie_header = header_str(req.headers(), header::COOKIE.as_str());
    let user_header = header_str(req.headers(), "x-greentic-user");
    let session_header = header_str(req.headers(), "x-greentic-session");
    let endpoint_header = header_str(req.headers(), "x-greentic-messaging-endpoint-id");

    // Resolve the bound deployment + tenant before touching the body, so an
    // unroutable request is rejected cheaply.
    let (deployment_id, tenant) = activation
        .routing
        .deployment_routes
        .resolve(host_header.as_deref(), &path)
        .map(|(deployment_id, tenant)| (deployment_id, tenant.to_string()))
        .ok_or_else(|| {
            error_response(
                StatusCode::NOT_FOUND,
                "no deployment is bound to this host and path",
            )
        })?;

    let body_bytes = read_body_limited(req).await.map_err(|_| {
        error_response(
            StatusCode::PAYLOAD_TOO_LARGE,
            "request body exceeds the size limit",
        )
    })?;
    let payload: Value = if body_bytes.is_empty() {
        Value::Null
    } else {
        serde_json::from_slice(&body_bytes)
            .map_err(|_| error_response(StatusCode::BAD_REQUEST, "request body must be JSON"))?
    };

    // Caller-asserted identity is only honoured from loopback peers (see
    // `caller_identity`). The session hint both pins the revision (stickiness)
    // and keys the flow session, so it feeds the dispatcher and the activity.
    // The messaging endpoint id (M1.4) partitions sessions/telemetry per
    // provider instance — header-only, never from the body.
    let (user, session_hint, endpoint_id) = caller_identity(
        peer_is_loopback,
        user_header,
        session_header,
        endpoint_header,
        &payload,
    );
    let cookie_value = cookie_header
        .as_deref()
        .and_then(|jar| read_cookie(jar, &cookie_name(deployment_id)));

    let dispatch_req = DispatchRequest {
        env_id: activation.routing.dispatcher.env_id(),
        tenant: &tenant,
        deployment_id,
        session_hint: session_hint.as_deref(),
        // Public client traffic is never trusted: the header-pinned revision
        // override is a debug-only affordance.
        trusted: false,
        header_revision: None,
        cookie: cookie_value.as_deref(),
    };
    // `ThreadRng` is `!Send` and the dispatcher is async, so it cannot survive
    // the `.await` in the spawned connection task. Seed a `Send` `SmallRng`.
    let mut rng: rand::rngs::SmallRng = rand::make_rng();
    let outcome = activation
        .routing
        .dispatcher
        .dispatch(&dispatch_req, &mut rng)
        .await
        .map_err(|err| {
            operator_log::warn(
                module_path!(),
                format!("revision dispatch for deployment {deployment_id} failed: {err:#}"),
            );
            error_response(
                StatusCode::INTERNAL_SERVER_ERROR,
                "revision dispatch failed",
            )
        })?;

    // Gate before executing: refuse provider webhook paths (deferred) and
    // non-POST generic requests, rather than running the entry flow for them.
    let scope = RevisionScope {
        deployment_id,
        bundle_id: outcome.bundle_id.clone(),
        revision_id: outcome.revision_id,
    };
    match admit_request(&activation.routing.http_routes, &scope, &path, &method) {
        Admission::ProviderRoute => {
            return Err(error_response(
                StatusCode::NOT_IMPLEMENTED,
                "this path is a provider ingress route; revision-aware provider serving is not \
                 yet implemented (use the legacy --bundle ingress)",
            ));
        }
        Admission::MethodNotAllowed => {
            return Err(error_response(
                StatusCode::METHOD_NOT_ALLOWED,
                "only POST is supported for the generic revision ingress",
            ));
        }
        Admission::Serve => {}
    }

    let activity = build_activity(
        &payload,
        &tenant,
        user.as_deref(),
        session_hint.as_deref(),
        endpoint_id.as_deref(),
    );

    let replies = activation
        .host
        .handle_activity_for_revision(
            &tenant,
            deployment_id,
            outcome.bundle_id.clone(),
            outcome.revision_id,
            activity,
        )
        .await
        .map_err(|err| {
            operator_log::error(
                module_path!(),
                format!(
                    "revision execution failed for deployment {deployment_id} revision {}: {err:#}",
                    outcome.revision_id
                ),
            );
            error_response(StatusCode::INTERNAL_SERVER_ERROR, "flow execution failed")
        })?;

    let body = serde_json::to_vec(&replies)
        .map_err(|err| error_response(StatusCode::INTERNAL_SERVER_ERROR, err.to_string()))?;
    let mut response = json_response(StatusCode::OK, body);
    if let Some(directive) = outcome.set_cookie {
        apply_set_cookie(&mut response, &directive);
    }
    Ok(response)
}

/// Map a generic JSON request body to a canonical [`Activity`]. A `text` field
/// becomes a messaging activity; anything else is wrapped as a custom
/// `http.request` activity. With no `flow_id` set, the runtime routes it to the
/// pack's entry flow.
fn build_activity(
    payload: &Value,
    tenant: &str,
    user: Option<&str>,
    session: Option<&str>,
    endpoint: Option<&str>,
) -> Activity {
    let mut activity = match payload.get("text").and_then(Value::as_str) {
        Some(text) => Activity::text(text),
        None => Activity::custom("http.request", payload.clone()),
    };
    activity = activity.with_tenant(tenant);
    if let Some(user) = user {
        activity = activity.from_user(user);
    }
    if let Some(session) = session {
        activity = activity.with_session(session);
    }
    if let Some(endpoint) = endpoint {
        activity = activity.with_messaging_endpoint(endpoint);
    }
    activity
}

/// Resolve the caller-asserted identity tuple, honouring it **only from
/// loopback peers**. Header wins over body for `(user, session)`.
///
/// The legacy webchat/DirectLine ingress likewise derives identity from the
/// unauthenticated client request, so on loopback this matches the existing
/// posture. But this path has no authentication, so a non-loopback caller must
/// not be able to assert another user's identity — which would let it resume
/// that user's waiting flow or key its session — nor poison revision stickiness
/// via a chosen session hint. Remote callers therefore run anonymously with no
/// session hint (the HMAC-signed stickiness cookie, which they cannot forge,
/// still works). A verified provider/DirectLine token is the Phase-D upgrade.
///
/// The messaging endpoint id (M1.4) is **header-only**, never read from the
/// body even on loopback. It is an operational routing decision (which provider
/// instance owns this request) that partitions sessions/telemetry per endpoint;
/// reading it from the attacker-controlled payload would let a body-supplied
/// endpoint id route a request to the wrong endpoint and pin the wrong session.
fn caller_identity(
    peer_is_loopback: bool,
    user_header: Option<String>,
    session_header: Option<String>,
    endpoint_header: Option<String>,
    payload: &Value,
) -> (Option<String>, Option<String>, Option<String>) {
    if !peer_is_loopback {
        return (None, None, None);
    }
    let user = user_header.or_else(|| str_field(payload, "user"));
    let session = session_header.or_else(|| str_field(payload, "session"));
    let endpoint = endpoint_header.and_then(validate_endpoint_id);
    (user, session, endpoint)
}

/// Validate a producer-asserted messaging endpoint id. Returns `Some(id)`
/// only for ASCII identifiers matching `[A-Za-z0-9_.-]{1,128}` — the
/// grammar that covers both the M1.2 ULID form and a hand-typeable slug
/// (`teams-legal`). Anything else collapses to `None` so the runner runs
/// unscoped rather than partitioning into a corrupt bucket. No
/// whitespace-trimming — a producer that sends incidental whitespace has
/// a bug we shouldn't mask; reject and let them fix it.
///
/// This defends the canonicalize-layer `ep=<eid>::<base>` session prefix:
/// * an empty value (e.g. `X-Greentic-Messaging-Endpoint-Id:` with no
///   body) would collapse all malformed-header traffic into one
///   `ep=::<base>` namespace, losing endpoint isolation;
/// * a value containing `:` (the prefix delimiter) would collide with
///   other endpoint/base pairs — `eid="a"+base="b::c"` and
///   `eid="a::b"+base="c"` both produce `ep=a::b::c`;
/// * control characters / unbounded length would corrupt downstream
///   session-store keys and telemetry attribute values.
fn validate_endpoint_id(raw: String) -> Option<String> {
    if raw.is_empty() || raw.len() > 128 {
        return None;
    }
    if !raw
        .bytes()
        .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'-' | b'_' | b'.'))
    {
        return None;
    }
    Some(raw)
}

/// Pre-execution admission decision for a dispatched revision request.
#[derive(Debug, PartialEq, Eq)]
enum Admission {
    /// Run the generic entry-flow activity.
    Serve,
    /// The path matches a declared provider ingress route for this revision —
    /// deferred (serving it generically would skip provider signature/token
    /// verification), so it is refused.
    ProviderRoute,
    /// A non-POST request to a generic (non-provider) path.
    MethodNotAllowed,
}

/// Decide whether a dispatched request may run the generic entry flow. Provider
/// routes win first (they are refused regardless of method); otherwise only POST
/// is admitted — a browser `GET /favicon.ico` under a broad `/` binding must not
/// execute the flow.
fn admit_request(
    routes: &HttpRouteTable,
    scope: &RevisionScope,
    path: &str,
    method: &hyper::Method,
) -> Admission {
    if routes
        .match_request_for_revision(path, method.as_str(), scope)
        .is_some()
    {
        return Admission::ProviderRoute;
    }
    if method != hyper::Method::POST {
        return Admission::MethodNotAllowed;
    }
    Admission::Serve
}

/// Read the request body with a hard size cap. `Err(())` means the limit was
/// exceeded (or the body stream errored); the caller maps it to `413`.
async fn read_body_limited(req: Request<Incoming>) -> Result<Bytes, ()> {
    Limited::new(req.into_body(), MAX_BODY_BYTES)
        .collect()
        .await
        .map(|collected| collected.to_bytes())
        .map_err(|_| ())
}

/// Fetch a single header value as an owned `String`, if present and valid UTF-8.
fn header_str(headers: &header::HeaderMap, name: &str) -> Option<String> {
    headers
        .get(name)
        .and_then(|value| value.to_str().ok())
        .map(|value| value.to_string())
}

/// Read a top-level string field from a JSON object body.
fn str_field(payload: &Value, key: &str) -> Option<String> {
    payload
        .get(key)
        .and_then(Value::as_str)
        .map(|value| value.to_string())
}

/// Look up a cookie value by name across a `Cookie` header value (RFC 6265
/// permits several `name=value` pairs separated by `; `).
fn read_cookie(jar: &str, name: &str) -> Option<String> {
    jar.split(';').find_map(|pair| {
        let (key, value) = pair.split_once('=')?;
        (key.trim() == name).then(|| value.trim().to_string())
    })
}

/// Attach the revision stickiness `Set-Cookie`. Cookie attributes (`Path`,
/// `Secure`, `HttpOnly`, `SameSite`) are an ingress concern, stamped here rather
/// than by the dispatcher.
fn apply_set_cookie(response: &mut Response<Full<Bytes>>, directive: &SetCookieDirective) {
    let header_value = directive.to_header_value();
    match header::HeaderValue::from_str(&header_value) {
        Ok(value) => {
            response.headers_mut().append(header::SET_COOKIE, value);
        }
        // The value is base64 (URL_SAFE_NO_PAD) so this should never fire; log
        // rather than silently drop the stickiness cookie.
        Err(err) => operator_log::warn(
            module_path!(),
            format!(
                "failed to encode revision Set-Cookie `{}`: {err}",
                directive.name
            ),
        ),
    }
}

fn json_response(status: StatusCode, body: Vec<u8>) -> Response<Full<Bytes>> {
    Response::builder()
        .status(status)
        .header(header::CONTENT_TYPE, "application/json")
        .body(Full::new(Bytes::from(body)))
        .expect("static response builder inputs are valid")
}

fn text_response(status: StatusCode, body: &str) -> Response<Full<Bytes>> {
    Response::builder()
        .status(status)
        .header(header::CONTENT_TYPE, "text/plain; charset=utf-8")
        .body(Full::new(Bytes::from(body.to_string())))
        .expect("static response builder inputs are valid")
}

fn error_response(status: StatusCode, message: impl AsRef<str>) -> Response<Full<Bytes>> {
    text_response(status, message.as_ref())
}

/// `/livez`, `/readyz`, `/healthz`, `/health` return `200 ok`; `/status`
/// returns the diagnostics JSON. Returns `None` for non-probe paths so the
/// caller falls through to routing.
fn try_probe_response(path: &str, state: &ServeState) -> Option<Response<Full<Bytes>>> {
    if matches!(path, "/livez" | "/readyz" | "/healthz" | "/health") {
        return Some(text_response(StatusCode::OK, "ok"));
    }
    if path == "/status" {
        let activation = state.current();
        let (deployments_routed, revisions_active) = activation.routing.dispatcher.counts();
        let body = serde_json::json!({
            "schema": "greentic.status.v1",
            "env_id": activation.routing.dispatcher.env_id(),
            "listen_addr": state.bound_addr.to_string(),
            "bundles_active": activation.routing.deployment_routes.len(),
            "deployments_routed": deployments_routed,
            "revisions_active": revisions_active,
        });
        return Some(json_response(StatusCode::OK, body.to_string().into_bytes()));
    }
    None
}

/// Resolve the bind address for the revision ingress.
///
/// Precedence (lowest to highest, each layer wins over the previous):
/// 1. The spec default ([`DEFAULT_LISTEN_ADDR`], `127.0.0.1:8080`).
/// 2. The persisted `host_config.listen_addr` (set by `op env init` /
///    `op config set listen_addr`).
/// 3. `GREENTIC_GATEWAY_LISTEN_ADDR` — accepts a full `SocketAddr`
///    (`0.0.0.0:9090`) or a bare `IpAddr` (`0.0.0.0`); for the bare-IP form
///    the port is taken from layer (1) or (2).
/// 4. `PORT` — port-only override matching the convention used by Heroku /
///    Cloud Run / Fly and the rest of the gateway configuration.
///
/// Operators set `host_config.listen_addr` once at env init; the env-vars
/// stay available for ad-hoc overrides (CI ports, local debugging) without
/// rewriting the env file.
pub(crate) fn resolve_bind_addr(host_config: Option<&EnvironmentHostConfig>) -> SocketAddr {
    let mut addr = host_config
        .map(EnvironmentHostConfig::resolved_listen_addr)
        .unwrap_or(DEFAULT_LISTEN_ADDR);

    if let Ok(raw) = std::env::var("GREENTIC_GATEWAY_LISTEN_ADDR") {
        let trimmed = raw.trim();
        // Empty / whitespace-only is treated as unset — many deployment
        // systems expose env-vars as empty strings to mean "use default";
        // a warning here would be noise.
        if !trimmed.is_empty() {
            if let Ok(sa) = trimmed.parse::<SocketAddr>() {
                addr = sa;
            } else if let Ok(ip) = trimmed.parse::<IpAddr>() {
                addr = SocketAddr::new(ip, addr.port());
            } else {
                operator_log::warn(
                    module_path!(),
                    format!(
                        "GREENTIC_GATEWAY_LISTEN_ADDR={trimmed:?} is not a valid SocketAddr or IP; \
                         falling back to {addr}"
                    ),
                );
            }
        }
    }

    if let Ok(raw) = std::env::var("PORT") {
        let trimmed = raw.trim();
        if !trimmed.is_empty() {
            if let Ok(port) = trimmed.parse::<u16>() {
                addr.set_port(port);
            } else {
                operator_log::warn(
                    module_path!(),
                    format!(
                        "PORT={trimmed:?} is not a valid u16; keeping port {}",
                        addr.port()
                    ),
                );
            }
        }
    }

    addr
}

#[cfg(test)]
mod tests {
    use super::*;
    // `BundleId` is used only in tests (prod refers to it via the `RevisionKey`
    // alias), so it lives here rather than in the library import set.
    use greentic_deploy_spec::ids::BundleId;
    use greentic_runner_host::engine::runtime::{FlowResumeStore, IngressEnvelope};
    use greentic_runner_host::runner::engine::{ExecutionState, FlowSnapshot, FlowWait};
    use greentic_runner_host::storage::new_session_store;
    use greentic_types::ReplyScope;
    use serde_json::json;

    #[test]
    fn build_activity_text_field_becomes_messaging_activity() {
        let payload = json!({ "text": "hello there" });
        let activity = build_activity(&payload, "acme", Some("u1"), Some("s1"), None);
        assert_eq!(activity.tenant(), Some("acme"));
        assert_eq!(activity.user(), Some("u1"));
        assert_eq!(activity.session_id(), Some("s1"));
        assert_eq!(activity.flow_type(), Some("messaging"));
        assert_eq!(
            activity.payload().get("text").and_then(Value::as_str),
            Some("hello there")
        );
    }

    #[test]
    fn build_activity_without_text_wraps_generic_payload() {
        let payload = json!({ "kind": "ping", "n": 7 });
        let activity = build_activity(&payload, "acme", None, None, None);
        assert_eq!(activity.tenant(), Some("acme"));
        assert_eq!(activity.user(), None);
        assert_eq!(activity.session_id(), None);
        // The whole body is preserved for the entry flow to interpret.
        assert_eq!(activity.payload(), &payload);
    }

    #[test]
    fn build_activity_empty_body_is_a_null_custom_activity() {
        let activity = build_activity(&Value::Null, "acme", None, None, None);
        assert_eq!(activity.tenant(), Some("acme"));
        assert_eq!(activity.payload(), &Value::Null);
    }

    #[test]
    fn build_activity_plumbs_messaging_endpoint_id() {
        let payload = json!({ "text": "hello" });
        let activity = build_activity(
            &payload,
            "acme",
            Some("u1"),
            Some("s1"),
            Some("teams-legal"),
        );
        // Serialize to wire form to prove the field rides on the Activity —
        // there's no public accessor returning Option<&str> for the endpoint,
        // and the runner reads it through the same serde shape.
        let wire = serde_json::to_value(&activity).expect("serialize");
        assert_eq!(
            wire.get("messaging_endpoint_id").and_then(Value::as_str),
            Some("teams-legal")
        );
    }

    #[test]
    fn read_cookie_picks_the_named_pair() {
        let jar = "foo=1; _gt_rev_abc=xyz ; bar=2";
        assert_eq!(read_cookie(jar, "_gt_rev_abc"), Some("xyz".to_string()));
        assert_eq!(read_cookie(jar, "missing"), None);
    }

    #[test]
    fn caller_identity_is_honoured_only_from_loopback() {
        let payload = json!({ "user": "body-user", "session": "body-session" });

        // Loopback: header wins over body, body fills the rest.
        let (user, session, endpoint) =
            caller_identity(true, Some("hdr-user".into()), None, None, &payload);
        assert_eq!(user.as_deref(), Some("hdr-user"));
        assert_eq!(session.as_deref(), Some("body-session"));
        assert!(endpoint.is_none());

        // Non-loopback: client-asserted identity is dropped entirely so a remote
        // caller cannot impersonate a user/session or pin a chosen revision.
        let (user, session, endpoint) = caller_identity(
            false,
            Some("hdr-user".into()),
            Some("hdr-session".into()),
            Some("teams-legal".into()),
            &payload,
        );
        assert_eq!(user, None);
        assert_eq!(session, None);
        assert!(endpoint.is_none());
    }

    #[test]
    fn caller_identity_returns_messaging_endpoint_from_loopback_header() {
        let payload = json!({});
        let (_, _, endpoint) =
            caller_identity(true, None, None, Some("teams-legal".into()), &payload);
        assert_eq!(endpoint.as_deref(), Some("teams-legal"));
    }

    #[test]
    fn caller_identity_never_reads_messaging_endpoint_from_body() {
        // Even on loopback, a body-supplied endpoint id must NOT be honoured.
        // Endpoint id is an operational routing decision, not user-asserted
        // identity; reading it from the payload would let an attacker pin a
        // chosen endpoint and partition into the wrong session bucket.
        let payload = json!({ "messaging_endpoint_id": "teams-attacker" });
        let (_, _, endpoint) = caller_identity(true, None, None, None, &payload);
        assert!(endpoint.is_none());
    }

    #[test]
    fn caller_identity_drops_messaging_endpoint_on_non_loopback() {
        // A remote caller cannot pin an endpoint id even via the header. The
        // verified-token Phase-D upgrade is the only way for remote ingress to
        // assert endpoint membership.
        let payload = json!({});
        let (_, _, endpoint) =
            caller_identity(false, None, None, Some("teams-legal".into()), &payload);
        assert!(endpoint.is_none());
    }

    #[test]
    fn caller_identity_silently_drops_malformed_endpoint_header() {
        // A loopback caller asserting a malformed endpoint id (empty, contains
        // the `:` prefix delimiter, control chars, over-length, etc.) gets the
        // unscoped path — never the `ep=::<base>` collapse or a colliding
        // `ep=a::b::c` bucket the runner cannot disambiguate.
        let payload = json!({});
        for raw in ["", "   ", "legal::accounting", "teams legal", "teams\n"] {
            let (_, _, endpoint) = caller_identity(true, None, None, Some(raw.into()), &payload);
            assert!(
                endpoint.is_none(),
                "header value {raw:?} should be rejected"
            );
        }
    }

    #[test]
    fn validate_endpoint_id_accepts_slug_and_ulid_forms() {
        assert_eq!(
            validate_endpoint_id("teams-legal".into()),
            Some("teams-legal".into())
        );
        assert_eq!(
            validate_endpoint_id("teams_legal.v2".into()),
            Some("teams_legal.v2".into())
        );
        // ULID (Crockford base32, 26 chars) — the M1.2 on-disk form.
        let ulid = "01HV3ZQXW8K0YBN8FXZ7P4M2R5";
        assert_eq!(validate_endpoint_id(ulid.into()), Some(ulid.into()));
    }

    #[test]
    fn validate_endpoint_id_rejects_empty_and_surrounding_whitespace() {
        // Empty header value fails the explicit empty check; surrounding
        // whitespace fails the grammar check (no trim, see fn docs).
        for raw in ["", "   ", "\t\n", "  teams-legal  "] {
            assert!(
                validate_endpoint_id(raw.into()).is_none(),
                "{raw:?} should reject"
            );
        }
    }

    #[test]
    fn validate_endpoint_id_rejects_prefix_delimiter() {
        // `:` is the `ep=<eid>::<base>` delimiter; any colon in eid would
        // make `ep=a::b::c` ambiguous (eid="a"+base="b::c" vs eid="a::b"+base="c").
        for raw in ["legal::accounting", "foo:bar"] {
            assert!(
                validate_endpoint_id(raw.into()).is_none(),
                "{raw:?} should reject"
            );
        }
    }

    #[test]
    fn validate_endpoint_id_rejects_control_chars_and_non_ascii() {
        for raw in [
            "teams\nlegal",
            "teams\0legal",
            "teams legal", // space
            "teams/legal",
            "команда", // non-ASCII
        ] {
            assert!(
                validate_endpoint_id(raw.into()).is_none(),
                "{raw:?} should reject"
            );
        }
    }

    #[test]
    fn validate_endpoint_id_rejects_over_length() {
        let too_long = "a".repeat(129);
        assert!(validate_endpoint_id(too_long).is_none());
        let max_ok = "a".repeat(128);
        assert_eq!(validate_endpoint_id(max_ok.clone()), Some(max_ok));
    }

    fn provider_route_table(scope: &RevisionScope) -> HttpRouteTable {
        use crate::domains::Domain;
        HttpRouteTable::from_descriptors(vec![crate::http_routes::descriptor_for_test(
            "/slack/events",
            &["POST"],
            Domain::Messaging,
            Some(scope.clone()),
        )])
    }

    fn test_scope() -> RevisionScope {
        RevisionScope {
            deployment_id: greentic_deploy_spec::DeploymentId::new(),
            bundle_id: greentic_deploy_spec::BundleId::new("fast2flow"),
            revision_id: greentic_deploy_spec::RevisionId::new(),
        }
    }

    #[test]
    fn admit_refuses_declared_provider_route() {
        let scope = test_scope();
        let routes = provider_route_table(&scope);
        // A POST to the declared provider webhook path is refused (deferred),
        // never run as a generic activity that would skip signature verification.
        assert_eq!(
            admit_request(&routes, &scope, "/slack/events", &hyper::Method::POST),
            Admission::ProviderRoute
        );
    }

    #[test]
    fn admit_rejects_non_post_on_generic_path() {
        let scope = test_scope();
        let routes = provider_route_table(&scope);
        // A browser GET that doesn't hit a provider route must not run the flow.
        assert_eq!(
            admit_request(&routes, &scope, "/favicon.ico", &hyper::Method::GET),
            Admission::MethodNotAllowed
        );
    }

    #[test]
    fn admit_serves_generic_post() {
        let scope = test_scope();
        let routes = provider_route_table(&scope);
        assert_eq!(
            admit_request(&routes, &scope, "/api/chat", &hyper::Method::POST),
            Admission::Serve
        );
    }

    #[test]
    fn admit_does_not_match_provider_route_of_a_different_revision() {
        let scope = test_scope();
        let routes = provider_route_table(&scope);
        // Same path, but a different revision's scope: not this revision's
        // provider route, so a POST falls through to generic serving.
        let other = test_scope();
        assert_eq!(
            admit_request(&routes, &other, "/slack/events", &hyper::Method::POST),
            Admission::Serve
        );
    }

    fn envelope_for(user: &str, conversation: &str) -> IngressEnvelope {
        IngressEnvelope {
            tenant: "acme".into(),
            env: Some("local".into()),
            pack_id: Some("pack.demo".into()),
            flow_id: "flow.main".into(),
            flow_type: Some("messaging".into()),
            action: Some("messaging".into()),
            session_hint: Some(format!("acme:provider:{conversation}:{user}")),
            provider: Some("provider".into()),
            messaging_endpoint_id: None,
            channel: Some(conversation.into()),
            conversation: Some(conversation.into()),
            user: Some(user.into()),
            activity_id: Some(format!("activity-{conversation}")),
            timestamp: None,
            payload: json!({ "text": "hi" }),
            metadata: None,
            reply_scope: Some(ReplyScope {
                conversation: conversation.into(),
                thread: None,
                reply_to: None,
                correlation: None,
            }),
        }
        .canonicalize()
    }

    fn wait_for(next_node: &str) -> FlowWait {
        let state: ExecutionState = serde_json::from_value(json!({
            "input": { "text": "hi" },
            "nodes": {},
            "egress": []
        }))
        .expect("state");
        FlowWait {
            reason: Some("await-user".into()),
            snapshot: FlowSnapshot {
                pack_id: "pack.demo".into(),
                flow_id: "flow.main".into(),
                next_flow: None,
                next_node: next_node.into(),
                state,
            },
        }
    }

    /// The core of the cross-revision contamination fix: two revisions of one
    /// pack, serving the SAME tenant/user/conversation, must not see each other's
    /// suspended `wait` snapshots. `revision_boot` now gives each revision its
    /// own session store; here we model that — two `FlowResumeStore`s over
    /// separate session backends — and prove a snapshot saved by revision A is
    /// invisible to revision B for the identical resume envelope.
    #[test]
    fn isolated_revision_stores_do_not_cross_resume() {
        let store_a = FlowResumeStore::new(new_session_store());
        let store_b = FlowResumeStore::new(new_session_store());

        // Identical resume key (same tenant/user/conversation) across revisions.
        let envelope = envelope_for("user-1", "conv-1");

        store_a
            .save(&envelope, &wait_for("node-a"))
            .expect("save A");

        // Revision B, with its own store, sees nothing for the same envelope.
        assert!(
            store_b.fetch(&envelope).expect("fetch B").is_none(),
            "revision B must not observe revision A's suspended snapshot"
        );
        // Revision A still resumes its own snapshot at the right node.
        let resumed = store_a
            .fetch(&envelope)
            .expect("fetch A")
            .expect("A snapshot present");
        assert_eq!(resumed.next_node, "node-a");

        store_a.clear(&envelope).expect("clear A");
    }

    /// Negative control: a SHARED session store (the pre-fix behavior) DOES leak
    /// across revisions for the same envelope — revision B resumes revision A's
    /// snapshot against a potentially different flow graph. This is exactly the
    /// contamination `revision_boot`'s per-revision stores prevent.
    #[test]
    fn shared_revision_store_leaks_across_revisions() {
        let shared = new_session_store();
        let store_a = FlowResumeStore::new(Arc::clone(&shared));
        let store_b = FlowResumeStore::new(shared);

        let envelope = envelope_for("user-1", "conv-1");
        store_a
            .save(&envelope, &wait_for("node-a"))
            .expect("save A");

        let leaked = store_b
            .fetch(&envelope)
            .expect("fetch B")
            .expect("shared store leaks the snapshot to revision B");
        assert_eq!(
            leaked.next_node, "node-a",
            "shared store hands revision A's snapshot to revision B (the bug)"
        );

        store_a.clear(&envelope).expect("clear");
    }

    // --- N1.2: listen-address resolution ----------------------------------
    //
    // These tests mutate process env-vars; serialize via `test_env_lock` so
    // they don't race the other listen-addr/env tests in the crate.

    fn host_cfg_with(addr: Option<SocketAddr>) -> EnvironmentHostConfig {
        EnvironmentHostConfig {
            env_id: greentic_types::EnvId::new("local").unwrap(),
            region: None,
            tenant_org_id: None,
            listen_addr: addr,
        }
    }

    struct EnvVarGuard {
        gateway_prev: Option<std::ffi::OsString>,
        port_prev: Option<std::ffi::OsString>,
    }

    impl EnvVarGuard {
        fn clean() -> Self {
            let gateway_prev = std::env::var_os("GREENTIC_GATEWAY_LISTEN_ADDR");
            let port_prev = std::env::var_os("PORT");
            // SAFETY: callers hold `test_env_lock` so env mutation is serialized.
            unsafe {
                std::env::remove_var("GREENTIC_GATEWAY_LISTEN_ADDR");
                std::env::remove_var("PORT");
            }
            Self {
                gateway_prev,
                port_prev,
            }
        }
    }

    impl Drop for EnvVarGuard {
        fn drop(&mut self) {
            // SAFETY: callers hold `test_env_lock` so env mutation is serialized.
            unsafe {
                match &self.gateway_prev {
                    Some(v) => std::env::set_var("GREENTIC_GATEWAY_LISTEN_ADDR", v),
                    None => std::env::remove_var("GREENTIC_GATEWAY_LISTEN_ADDR"),
                }
                match &self.port_prev {
                    Some(v) => std::env::set_var("PORT", v),
                    None => std::env::remove_var("PORT"),
                }
            }
        }
    }

    // --- N1.2: probe surface ---------------------------------------------

    /// Build an [`Activation`] from a host + dispatcher, threading the
    /// other ingress-routing fields with their test-default empty values.
    /// Single source of the assembly so test fixtures (`empty_activation`,
    /// `populated_activation`, `activation_with_ids`) don't redeclare it.
    fn activation_for_test(
        host: std::sync::Arc<greentic_runner_host::RunnerHost>,
        dispatcher: crate::revision_dispatcher::RevisionDispatcher,
    ) -> Activation {
        Activation {
            host,
            routing: std::sync::Arc::new(RevisionIngressRouting {
                dispatcher: std::sync::Arc::new(dispatcher),
                http_routes: HttpRouteTable::from_descriptors(Vec::new()),
                deployment_routes: crate::deployment_routes::DeploymentRouteTable::default(),
            }),
        }
    }

    fn empty_activation(env_id: &str) -> Activation {
        use crate::revision_dispatcher::{RevisionDispatcher, RevisionDispatcherConfig};
        let host = std::sync::Arc::new(
            greentic_runner_host::HostBuilder::new()
                .with_config(greentic_runner_host::HostConfig::from_gtbind(
                    greentic_runner_host::TenantBindings {
                        tenant: env_id.to_string(),
                        packs: Vec::new(),
                        env_passthrough: Vec::new(),
                    },
                ))
                .build()
                .expect("build placeholder host"),
        );
        let dispatcher = RevisionDispatcher::new(RevisionDispatcherConfig::new(env_id, [0u8; 32]));
        activation_for_test(host, dispatcher)
    }

    fn empty_state(env_id: &str, bound: SocketAddr) -> ServeState {
        ServeState {
            slot: ArcSwap::new(std::sync::Arc::new(empty_activation(env_id))),
            bound_addr: bound,
        }
    }

    fn body_string(resp: Response<Full<Bytes>>) -> String {
        // `Full<Bytes>` carries its single chunk; `BodyExt::collect` is async,
        // so a current-thread runtime drives the (immediate) future.
        let body = resp.into_body();
        let runtime = tokio::runtime::Builder::new_current_thread()
            .build()
            .expect("current-thread runtime for test body collection");
        let collected = runtime.block_on(body.collect()).expect("collect Full body");
        let bytes = collected.to_bytes();
        String::from_utf8_lossy(&bytes).into_owned()
    }

    #[test]
    fn try_probe_response_returns_ok_for_each_probe_alias() {
        let bound: SocketAddr = "127.0.0.1:8080".parse().unwrap();
        let state = empty_state("local", bound);
        for path in ["/livez", "/readyz", "/healthz", "/health"] {
            let resp = try_probe_response(path, &state)
                .unwrap_or_else(|| panic!("expected probe response for {path}"));
            assert_eq!(resp.status(), StatusCode::OK, "{path} status");
            assert_eq!(body_string(resp), "ok", "{path} body");
        }
    }

    #[test]
    fn try_probe_response_status_reports_empty_runtime_diagnostics() {
        // N1.2: with no bundles attached, `/status` returns the same JSON
        // shape, with `bundles_active`/`deployments_routed`/`revisions_active`
        // all zero. Operators read this to confirm the listener is up but no
        // traffic is being served.
        let bound: SocketAddr = "0.0.0.0:9090".parse().unwrap();
        let state = empty_state("prod-eu", bound);
        let resp = try_probe_response("/status", &state).expect("status response");
        assert_eq!(resp.status(), StatusCode::OK);
        let body: serde_json::Value = serde_json::from_str(&body_string(resp)).unwrap();
        assert_eq!(body["schema"], "greentic.status.v1");
        assert_eq!(body["env_id"], "prod-eu");
        assert_eq!(body["listen_addr"], "0.0.0.0:9090");
        assert_eq!(body["bundles_active"], 0);
        assert_eq!(body["deployments_routed"], 0);
        assert_eq!(body["revisions_active"], 0);
    }

    #[test]
    fn try_probe_response_returns_none_for_non_probe_paths() {
        let bound: SocketAddr = "127.0.0.1:8080".parse().unwrap();
        let state = empty_state("local", bound);
        // Real traffic paths must fall through to the routing pipeline.
        assert!(try_probe_response("/api/chat", &state).is_none());
        assert!(try_probe_response("/livez/sub", &state).is_none());
        assert!(try_probe_response("/", &state).is_none());
    }

    // --- N1.2: listen-address resolution ----------------------------------

    #[test]
    fn resolve_bind_addr_falls_back_to_spec_default_when_nothing_is_set() {
        let _lock = crate::test_env_lock()
            .lock()
            .unwrap_or_else(|e| e.into_inner());
        let _vars = EnvVarGuard::clean();
        assert_eq!(resolve_bind_addr(None), DEFAULT_LISTEN_ADDR);
    }

    #[test]
    fn resolve_bind_addr_uses_host_config_when_set() {
        let _lock = crate::test_env_lock()
            .lock()
            .unwrap_or_else(|e| e.into_inner());
        let _vars = EnvVarGuard::clean();
        let configured: SocketAddr = "192.168.1.10:9000".parse().unwrap();
        let host = host_cfg_with(Some(configured));
        assert_eq!(resolve_bind_addr(Some(&host)), configured);
    }

    #[test]
    fn resolve_bind_addr_gateway_env_full_socketaddr_overrides_host_config() {
        let _lock = crate::test_env_lock()
            .lock()
            .unwrap_or_else(|e| e.into_inner());
        let _vars = EnvVarGuard::clean();
        let host = host_cfg_with(Some("192.168.1.10:9000".parse().unwrap()));
        // SAFETY: tests holding `test_env_lock` serialize env mutations.
        unsafe { std::env::set_var("GREENTIC_GATEWAY_LISTEN_ADDR", "0.0.0.0:7000") };
        assert_eq!(
            resolve_bind_addr(Some(&host)),
            "0.0.0.0:7000".parse::<SocketAddr>().unwrap()
        );
    }

    #[test]
    fn resolve_bind_addr_gateway_env_bare_ip_keeps_port_from_host_config() {
        let _lock = crate::test_env_lock()
            .lock()
            .unwrap_or_else(|e| e.into_inner());
        let _vars = EnvVarGuard::clean();
        let host = host_cfg_with(Some("127.0.0.1:9090".parse().unwrap()));
        // SAFETY: tests holding `test_env_lock` serialize env mutations.
        unsafe { std::env::set_var("GREENTIC_GATEWAY_LISTEN_ADDR", "0.0.0.0") };
        // Port carried over from host_config (9090), IP from env-var.
        assert_eq!(
            resolve_bind_addr(Some(&host)),
            "0.0.0.0:9090".parse::<SocketAddr>().unwrap()
        );
    }

    #[test]
    fn resolve_bind_addr_port_env_overrides_only_the_port() {
        let _lock = crate::test_env_lock()
            .lock()
            .unwrap_or_else(|e| e.into_inner());
        let _vars = EnvVarGuard::clean();
        let host = host_cfg_with(Some("192.168.1.10:9000".parse().unwrap()));
        // SAFETY: tests holding `test_env_lock` serialize env mutations.
        unsafe { std::env::set_var("PORT", "5555") };
        assert_eq!(
            resolve_bind_addr(Some(&host)),
            "192.168.1.10:5555".parse::<SocketAddr>().unwrap()
        );
    }

    #[test]
    fn resolve_bind_addr_port_env_layers_on_top_of_gateway_env() {
        let _lock = crate::test_env_lock()
            .lock()
            .unwrap_or_else(|e| e.into_inner());
        let _vars = EnvVarGuard::clean();
        // SAFETY: tests holding `test_env_lock` serialize env mutations.
        unsafe {
            std::env::set_var("GREENTIC_GATEWAY_LISTEN_ADDR", "10.0.0.5:8000");
            std::env::set_var("PORT", "9999");
        }
        // PORT layers AFTER the GATEWAY env-var: same IP, PORT's port wins.
        assert_eq!(
            resolve_bind_addr(None),
            "10.0.0.5:9999".parse::<SocketAddr>().unwrap()
        );
    }

    #[test]
    fn resolve_bind_addr_invalid_gateway_env_falls_through() {
        let _lock = crate::test_env_lock()
            .lock()
            .unwrap_or_else(|e| e.into_inner());
        let _vars = EnvVarGuard::clean();
        let host = host_cfg_with(Some("127.0.0.1:9090".parse().unwrap()));
        // SAFETY: tests holding `test_env_lock` serialize env mutations.
        unsafe { std::env::set_var("GREENTIC_GATEWAY_LISTEN_ADDR", "not-an-address") };
        // Invalid env-var is ignored; persisted host_config wins.
        assert_eq!(
            resolve_bind_addr(Some(&host)),
            "127.0.0.1:9090".parse::<SocketAddr>().unwrap()
        );
    }

    #[test]
    fn resolve_bind_addr_invalid_port_env_falls_through() {
        let _lock = crate::test_env_lock()
            .lock()
            .unwrap_or_else(|e| e.into_inner());
        let _vars = EnvVarGuard::clean();
        let host = host_cfg_with(Some("127.0.0.1:9090".parse().unwrap()));
        // SAFETY: tests holding `test_env_lock` serialize env mutations.
        unsafe { std::env::set_var("PORT", "not-a-number") };
        assert_eq!(
            resolve_bind_addr(Some(&host)),
            "127.0.0.1:9090".parse::<SocketAddr>().unwrap()
        );
    }

    // --- N2.1: reload + overlap-window drop --------------------------------

    /// Build an [`Activation`] with `revision_count` revisions under a single
    /// deployment, suitable for asserting reload counts change after swap.
    fn populated_activation(env_id: &str, revision_count: u32) -> Activation {
        use crate::revision_dispatcher::{
            RevisionDispatcher, RevisionDispatcherConfig, RevisionEntry,
        };
        use greentic_deploy_spec::ids::{BundleId, DeploymentId, RevisionId};

        let base = empty_activation(env_id);
        let dispatcher = RevisionDispatcher::new(RevisionDispatcherConfig::new(env_id, [0u8; 32]));
        let deployment_id = DeploymentId::new();
        let bundle_id = BundleId::new("customer.support");
        let total: u32 = 10_000;
        let per_revision = total / revision_count;
        let mut remainder = total - per_revision * revision_count;
        let revisions: Vec<RevisionEntry> = (0..revision_count)
            .map(|_| {
                let weight_bps = per_revision + if remainder > 0 { 1 } else { 0 };
                remainder = remainder.saturating_sub(1);
                RevisionEntry {
                    revision_id: RevisionId::new(),
                    bundle_id: bundle_id.clone(),
                    weight_bps,
                }
            })
            .collect();
        dispatcher
            .apply_traffic_split(deployment_id, revisions, bundle_id, 0)
            .expect("apply_traffic_split for test activation");
        activation_for_test(base.host, dispatcher)
    }

    /// Construct a [`RevisionServer`] with no listener thread, just the state
    /// slot + the current Tokio runtime handle. Lets reload tests run under
    /// `#[tokio::test]` without binding a real port.
    fn server_for_test(state: std::sync::Arc<ServeState>) -> RevisionServer {
        // Mirror `start()`: seed the watermark from the initial activation
        // so reload() tests behave the same way the production cold-start
        // path does.
        let mut watermark: HashMap<DeploymentId, u64> = HashMap::new();
        state
            .slot
            .load()
            .routing
            .dispatcher
            .absorb_into_watermark(&mut watermark);
        RevisionServer {
            shutdown: None,
            handle: None,
            actual_port: 0,
            state,
            runtime_handle: Handle::current(),
            reload_lock: std::sync::Mutex::new(()),
            generation_watermark: std::sync::Mutex::new(watermark),
        }
    }

    #[tokio::test]
    async fn reload_swaps_activation_visible_to_next_counts() {
        let bound: SocketAddr = "127.0.0.1:0".parse().unwrap();
        let state = std::sync::Arc::new(empty_state("env-1", bound));
        let server = server_for_test(state);
        assert_eq!(server.counts(), (0, 0));

        let report = server.reload(populated_activation("env-1", 2), Duration::ZERO);
        assert_eq!(report.prev_deployments, 0);
        assert_eq!(report.prev_revisions, 0);
        assert_eq!(report.new_deployments, 1);
        assert_eq!(report.new_revisions, 2);

        // The next reader sees the new activation; counts come from the same
        // dispatcher `/status` reads.
        assert_eq!(server.counts(), (1, 2));
    }

    #[tokio::test]
    async fn reload_inflight_arc_outlives_swap() {
        let bound: SocketAddr = "127.0.0.1:0".parse().unwrap();
        let state = std::sync::Arc::new(empty_state("env-1", bound));
        let server = server_for_test(std::sync::Arc::clone(&state));

        // Snapshot the activation the way a request handler does at the top
        // of `serve`. After the swap this Arc must still be live: a request
        // mid-flight cannot tear (dispatch on new, execute on old).
        let inflight = state.current();
        let inflight_ptr = std::sync::Arc::as_ptr(&inflight) as usize;

        server.reload(populated_activation("env-1", 1), Duration::from_secs(60));

        // The swap is visible to the next reader, but the previously
        // snapshotted Arc still points at the old activation.
        let post_swap = state.current();
        assert_ne!(
            std::sync::Arc::as_ptr(&post_swap) as usize,
            inflight_ptr,
            "post-swap snapshot must point at the new activation"
        );
        // Old activation still serves zero revisions; new serves one.
        let (old_deps, old_revs) = inflight.routing.dispatcher.counts();
        assert_eq!((old_deps, old_revs), (0, 0));
        let (new_deps, new_revs) = post_swap.routing.dispatcher.counts();
        assert_eq!((new_deps, new_revs), (1, 1));
    }

    #[tokio::test]
    async fn reload_drops_old_activation_after_drain_window() {
        let bound: SocketAddr = "127.0.0.1:0".parse().unwrap();
        let state = std::sync::Arc::new(empty_state("env-1", bound));
        let server = server_for_test(std::sync::Arc::clone(&state));

        // Track the pre-swap activation via a `Weak`. After the drain window
        // elapses, every strong ref the spawned drop task held should be gone
        // — `upgrade()` returns `None`.
        let weak_old = std::sync::Arc::downgrade(&state.current());

        let drain_window = Duration::from_millis(50);
        server.reload(populated_activation("env-1", 1), drain_window);

        // Inside the drain window: the spawned drop task is still sleeping,
        // so the old activation is still alive.
        assert!(
            weak_old.upgrade().is_some(),
            "old activation must outlive the drain window"
        );

        // Wait past the window. The drop task wakes, drops its Arc, and the
        // last strong ref is gone.
        tokio::time::sleep(drain_window + Duration::from_millis(200)).await;
        assert!(
            weak_old.upgrade().is_none(),
            "old activation must be freed once the drain window elapses"
        );
    }

    /// Build an [`Activation`] with a single deployment + revision, both
    /// taken as parameters so two activations can share IDs across a reload.
    /// The dispatcher carries the deployment at generation 1 (whatever
    /// `apply_traffic_split(.., expected_generation=0)` yields).
    fn activation_with_ids(
        env_id: &str,
        deployment_id: greentic_deploy_spec::ids::DeploymentId,
        revision_id: greentic_deploy_spec::ids::RevisionId,
        bundle_id: greentic_deploy_spec::ids::BundleId,
    ) -> Activation {
        use crate::revision_dispatcher::{
            RevisionDispatcher, RevisionDispatcherConfig, RevisionEntry,
        };
        let base = empty_activation(env_id);
        let dispatcher = RevisionDispatcher::new(RevisionDispatcherConfig::new(env_id, [0u8; 32]));
        let revisions = vec![RevisionEntry {
            revision_id,
            bundle_id: bundle_id.clone(),
            weight_bps: 10_000,
        }];
        dispatcher
            .apply_traffic_split(deployment_id, revisions, bundle_id, 0)
            .expect("apply_traffic_split for shared-deployment activation");
        activation_for_test(base.host, dispatcher)
    }

    #[tokio::test]
    async fn reload_invalidates_pre_reload_cookie_for_persisted_deployment() {
        // Regression test for the Codex finding on PR-N2.1: without the
        // generation bump in reload(), a fresh dispatcher built from the
        // same runtime-config would carry the same `apply_traffic_split`-
        // from-zero default generation (1), and a cookie minted pre-reload
        // would still verify post-reload — defeating canary weight cuts
        // and partial rollbacks for already-cookie'd clients.
        let env_id = "env-1";
        let tenant = "tenant-a";
        let dep_id = greentic_deploy_spec::ids::DeploymentId::new();
        let rev_id = greentic_deploy_spec::ids::RevisionId::new();
        let bundle_id = greentic_deploy_spec::ids::BundleId::new("customer.support");

        let act1 = activation_with_ids(env_id, dep_id, rev_id, bundle_id.clone());
        let bound: SocketAddr = "127.0.0.1:0".parse().unwrap();
        let state = std::sync::Arc::new(ServeState {
            slot: ArcSwap::new(std::sync::Arc::new(act1)),
            bound_addr: bound,
        });
        let server = server_for_test(std::sync::Arc::clone(&state));

        // Mint a cookie against the live (pre-reload) dispatcher. The test
        // helper seals it under generation 1 — the value `apply_traffic_split`
        // writes for a from-zero call.
        let act1_snap = state.current();
        assert_eq!(
            act1_snap.routing.dispatcher.counts(),
            (1, 1),
            "pre-reload activation must hold the test deployment + revision"
        );
        let cookie = act1_snap.routing.dispatcher.seal_cookie(
            env_id,
            tenant,
            dep_id,
            rev_id,
            /* generation */ 1,
            /* expires_at */ 9_999_999_999,
        );
        // Sanity: the cookie verifies against the pre-reload dispatcher at
        // generation 1, the value `apply_traffic_split(.., 0)` produces.
        assert_eq!(
            act1_snap
                .routing
                .dispatcher
                .verify_cookie(&cookie, env_id, tenant, dep_id, 1, 0),
            Some(rev_id),
            "pre-reload dispatcher must verify its own cookie"
        );

        // Reload to a new activation that re-uses the SAME deployment + bundle
        // + revision (only the dispatcher object is fresh). Carry-forward must
        // bump the new dispatcher's generation so the cookie sealed under
        // generation 1 no longer verifies.
        let act2 = activation_with_ids(env_id, dep_id, rev_id, bundle_id);
        server.reload(act2, Duration::ZERO);

        let act2_snap = state.current();
        // The cookie's `g` is still 1, but the live dispatcher's expected
        // generation is now 2 (1 + 1 from the watermark bump) → mismatch → None.
        assert_eq!(
            act2_snap
                .routing
                .dispatcher
                .verify_cookie(&cookie, env_id, tenant, dep_id, 2, 0),
            None,
            "post-reload dispatcher must reject the pre-reload cookie"
        );
        // And the post-reload cookie minted against `act2`'s actual generation
        // (2) does verify, proving the carry-forward landed at 2 specifically.
        let post_cookie = act2_snap.routing.dispatcher.seal_cookie(
            env_id,
            tenant,
            dep_id,
            rev_id,
            2,
            9_999_999_999,
        );
        assert_eq!(
            act2_snap
                .routing
                .dispatcher
                .verify_cookie(&post_cookie, env_id, tenant, dep_id, 2, 0),
            Some(rev_id),
            "post-reload dispatcher must verify a cookie minted at the new generation"
        );
    }

    #[tokio::test]
    async fn reload_invalidates_cookie_after_remove_and_readd_within_ttl() {
        // Codex regression: without the server-level generation watermark,
        // a bump driven only by the previous dispatcher would miss
        // deployments that had been removed from runtime-config. A
        // deployment removed and later re-added before cookie/pin TTL
        // elapsed got a fresh dispatcher at the same
        // `from_runtime_config`-default generation, and the dispatcher
        // would happily verify a cookie signed against the original
        // activation. This test asserts the watermark tombstones removed
        // deployments so the re-added one is strictly newer than anything
        // a client could be holding.
        let env_id = "env-1";
        let tenant = "tenant-a";
        let dep_id = greentic_deploy_spec::ids::DeploymentId::new();
        let rev_id = greentic_deploy_spec::ids::RevisionId::new();
        let bundle_id = greentic_deploy_spec::ids::BundleId::new("customer.support");

        let act1 = activation_with_ids(env_id, dep_id, rev_id, bundle_id.clone());
        let bound: SocketAddr = "127.0.0.1:0".parse().unwrap();
        let state = std::sync::Arc::new(ServeState {
            slot: ArcSwap::new(std::sync::Arc::new(act1)),
            bound_addr: bound,
        });
        let server = server_for_test(std::sync::Arc::clone(&state));

        // Sign a cookie against act1's generation (1, from
        // `apply_traffic_split(.., 0)`).
        let act1_snap = state.current();
        let cookie = act1_snap.routing.dispatcher.seal_cookie(
            env_id,
            tenant,
            dep_id,
            rev_id,
            /* generation */ 1,
            /* expires_at */ 9_999_999_999,
        );

        // Reload to an activation that drops the deployment entirely
        // (simulates the operator running `gtc op bundles remove` or
        // setting traffic to 0 across all revisions). The watermark must
        // record dep_id at generation 1 even though the live dispatcher
        // no longer carries it.
        let empty = empty_activation(env_id);
        server.reload(empty, Duration::ZERO);

        // Reload AGAIN to re-add the same deployment + revision (rollback
        // / re-stage). The fresh dispatcher would otherwise pin dep_id at
        // generation 1 again — the watermark must force it to 2.
        let act3 = activation_with_ids(env_id, dep_id, rev_id, bundle_id);
        server.reload(act3, Duration::ZERO);

        let act3_snap = state.current();
        assert_eq!(
            act3_snap.routing.dispatcher.counts(),
            (1, 1),
            "re-added deployment must be present in the post-reload dispatcher"
        );

        // `dispatch()` passes the live dispatcher's current generation as
        // `expected_generation` — the watermark must have bumped that past
        // the cookie's signed generation. Mirror that here: a cookie
        // signed at generation 1 must NOT verify under the live dispatcher's
        // post-reload generation (which the watermark forced to 2).
        assert_eq!(
            act3_snap
                .routing
                .dispatcher
                .verify_cookie(&cookie, env_id, tenant, dep_id, 2, 0),
            None,
            "cookie sealed before remove must NOT verify under the bumped generation"
        );
        // Specifically: the new generation is exactly 2 — one bump for
        // the absorb(act1) that landed in the watermark before the empty
        // reload, applied when act3's freshly-built generation 1 was
        // bumped on top of it. A cookie sealed AT 2 verifies; sanity-check
        // the watermark didn't over-bump.
        let post_cookie = act3_snap.routing.dispatcher.seal_cookie(
            env_id,
            tenant,
            dep_id,
            rev_id,
            2,
            9_999_999_999,
        );
        assert_eq!(
            act3_snap
                .routing
                .dispatcher
                .verify_cookie(&post_cookie, env_id, tenant, dep_id, 2, 0),
            Some(rev_id),
            "cookie minted at the bumped generation (2) must verify"
        );
    }

    // --- N2.3: revision drain on removal -----------------------------------

    /// Activation with one deployment + two revisions, route table seeded
    /// with `(deployment_id → tenant)` so `spawn_revision_drains` finds the
    /// tenant binding. Returns `(activation, dispatcher_arc)` so callers can
    /// keep a handle to OLD's dispatcher across the reload and observe drain
    /// transitions on it after the producer task fires.
    fn activation_with_two_revisions(
        env_id: &str,
        tenant: &str,
        deployment_id: DeploymentId,
        rev_a: RevisionId,
        rev_b: RevisionId,
        bundle_id: BundleId,
    ) -> (Activation, std::sync::Arc<RevisionDispatcher>) {
        use crate::revision_dispatcher::{RevisionDispatcherConfig, RevisionEntry};
        let base = empty_activation(env_id);
        let dispatcher = RevisionDispatcher::new(RevisionDispatcherConfig::new(env_id, [0u8; 32]));
        let revisions = vec![
            RevisionEntry {
                revision_id: rev_a,
                bundle_id: bundle_id.clone(),
                weight_bps: 5_000,
            },
            RevisionEntry {
                revision_id: rev_b,
                bundle_id: bundle_id.clone(),
                weight_bps: 5_000,
            },
        ];
        dispatcher
            .apply_traffic_split(deployment_id, revisions, bundle_id, 0)
            .expect("apply_traffic_split");
        let dispatcher = std::sync::Arc::new(dispatcher);
        let routing =
            std::sync::Arc::new(RevisionIngressRouting {
                dispatcher: std::sync::Arc::clone(&dispatcher),
                http_routes: HttpRouteTable::from_descriptors(Vec::new()),
                deployment_routes: crate::deployment_routes::DeploymentRouteTable::from_parts(
                    vec![(deployment_id, tenant.to_string(), Vec::new(), Vec::new())],
                ),
            });
        let activation = Activation {
            host: base.host,
            routing,
        };
        (activation, dispatcher)
    }

    #[tokio::test]
    async fn reload_drain_marks_then_evicts_removed_revision() {
        // Reload removes one of two revisions under the same deployment.
        // The drain coordinator should mark the removed revision draining
        // on the OLD dispatcher immediately, and evict it after the drain
        // window. The kept revision must NOT be marked draining.
        let env_id = "env-1";
        let tenant = "tenant-a";
        let dep_id = DeploymentId::new();
        let rev_kept = RevisionId::new();
        let rev_removed = RevisionId::new();
        let bundle_id = BundleId::new("customer.support");

        let (act_old, old_dispatcher) = activation_with_two_revisions(
            env_id,
            tenant,
            dep_id,
            rev_kept,
            rev_removed,
            bundle_id.clone(),
        );
        let state = serve_state_with(act_old);
        let server = server_for_test(std::sync::Arc::clone(&state));

        // NEW activation keeps `rev_kept` only (single-revision, full weight).
        let act_new = activation_with_ids(env_id, dep_id, rev_kept, bundle_id);

        // Use a short drain window so the test finishes quickly. drain_seconds
        // is derived from drain_window.as_secs(); 1s gives the coordinator
        // enough room to mark, sleep, and evict before we assert.
        server.reload(act_new, Duration::from_secs(1));

        // After the swap returns, the drain task has been spawned but may
        // not have run mark_draining yet. Yield to give it a chance.
        tokio::time::sleep(Duration::from_millis(50)).await;
        assert!(
            old_dispatcher.is_draining(dep_id, rev_removed),
            "removed revision must be marked draining on OLD dispatcher"
        );
        assert!(
            !old_dispatcher.is_draining(dep_id, rev_kept),
            "kept revision must NOT be marked draining"
        );

        // Wait past the drain window. Coordinator evicts the removed
        // revision from the OLD dispatcher.
        tokio::time::sleep(Duration::from_millis(1_200)).await;
        let revision_ids: std::collections::HashSet<_> = old_dispatcher
            .revision_keys()
            .into_iter()
            .filter(|(d, _, _)| *d == dep_id)
            .map(|(_, _, r)| r)
            .collect();
        assert!(
            !revision_ids.contains(&rev_removed),
            "removed revision must be evicted from OLD dispatcher after drain"
        );
        assert!(
            revision_ids.contains(&rev_kept),
            "kept revision must remain on OLD dispatcher"
        );
    }

    #[tokio::test]
    async fn reload_does_not_drain_when_no_revisions_removed() {
        // Reload that keeps the same revision set must not mark anything
        // draining on the OLD dispatcher — adding a brand new deployment or
        // reweighting the same revisions doesn't constitute a removal.
        let env_id = "env-1";
        let tenant = "tenant-a";
        let dep_id = DeploymentId::new();
        let rev_a = RevisionId::new();
        let rev_b = RevisionId::new();
        let bundle_id = BundleId::new("customer.support");

        let (act_old, old_dispatcher) =
            activation_with_two_revisions(env_id, tenant, dep_id, rev_a, rev_b, bundle_id.clone());
        let state = serve_state_with(act_old);
        let server = server_for_test(std::sync::Arc::clone(&state));

        // NEW activation: same deployment, same two revisions (identical set).
        let (act_new, _) =
            activation_with_two_revisions(env_id, tenant, dep_id, rev_a, rev_b, bundle_id);
        server.reload(act_new, Duration::from_millis(100));

        tokio::time::sleep(Duration::from_millis(50)).await;
        assert!(
            old_dispatcher.draining_revisions(dep_id).is_empty(),
            "no revisions removed → nothing marked draining (got {:?})",
            old_dispatcher.draining_revisions(dep_id)
        );
    }

    #[tokio::test]
    async fn reload_zero_drain_window_skips_drain_spawn() {
        // `drain_window == 0` is a test-only mode that drops the OLD
        // activation synchronously. The drain coordinator path MUST be
        // bypassed too — otherwise drain tasks would race the synchronous
        // drop against a dispatcher whose `Arc<RevisionDispatcher>` could
        // have been the last strong handle outside the coordinator.
        let env_id = "env-1";
        let tenant = "tenant-a";
        let dep_id = DeploymentId::new();
        let rev_a = RevisionId::new();
        let rev_b = RevisionId::new();
        let bundle_id = BundleId::new("customer.support");

        let (act_old, old_dispatcher) =
            activation_with_two_revisions(env_id, tenant, dep_id, rev_a, rev_b, bundle_id.clone());
        let state = serve_state_with(act_old);
        let server = server_for_test(std::sync::Arc::clone(&state));

        let act_new = activation_with_ids(env_id, dep_id, rev_a, bundle_id);
        server.reload(act_new, Duration::ZERO);

        // Give any erroneously-spawned drain task a chance to run.
        tokio::time::sleep(Duration::from_millis(50)).await;
        assert!(
            old_dispatcher.draining_revisions(dep_id).is_empty(),
            "drain_window == 0 must bypass drain spawn (got {:?})",
            old_dispatcher.draining_revisions(dep_id)
        );
    }

    // --- N2.3 Codex fix: stale-eviction suppression probe ------------------

    /// Build a `ServeState` whose live slot holds `activation`. Helper for
    /// the [`SlotLivenessProbe`] tests.
    fn serve_state_with(activation: Activation) -> std::sync::Arc<ServeState> {
        let bound: SocketAddr = "127.0.0.1:0".parse().unwrap();
        std::sync::Arc::new(ServeState {
            slot: ArcSwap::new(std::sync::Arc::new(activation)),
            bound_addr: bound,
        })
    }

    #[test]
    fn liveness_probe_reports_live_when_revision_present_in_newer_activation() {
        // The live slot holds a NEWER activation (different dispatcher Arc)
        // that serves the revision → the OLD activation's drain must treat
        // the revision as live elsewhere and suppress its eviction event.
        let env_id = "env-1";
        let dep_id = DeploymentId::new();
        let rev_id = RevisionId::new();
        let bundle_id = BundleId::new("customer.support");

        let draining = activation_with_ids(env_id, dep_id, rev_id, bundle_id.clone());
        let draining_dispatcher = std::sync::Arc::clone(&draining.routing.dispatcher);
        // A distinct, newer activation that also serves the revision.
        let live = activation_with_ids(env_id, dep_id, rev_id, bundle_id);
        let state = serve_state_with(live);

        let probe = SlotLivenessProbe {
            state,
            draining_dispatcher,
        };
        assert!(
            probe.is_live_elsewhere(dep_id, rev_id),
            "revision present in a newer activation must read as live elsewhere"
        );
    }

    #[test]
    fn liveness_probe_reports_not_live_when_live_slot_is_the_draining_dispatcher() {
        // Identity guard: if the live slot still points at the very
        // dispatcher being drained, the revision is NOT live in a newer
        // activation — the eviction event should fire (direct-drain
        // semantics). Models the future `gtc op revisions drain` path.
        let env_id = "env-1";
        let dep_id = DeploymentId::new();
        let rev_id = RevisionId::new();
        let bundle_id = BundleId::new("customer.support");

        let live = activation_with_ids(env_id, dep_id, rev_id, bundle_id);
        let draining_dispatcher = std::sync::Arc::clone(&live.routing.dispatcher);
        let state = serve_state_with(live);

        let probe = SlotLivenessProbe {
            state,
            draining_dispatcher,
        };
        assert!(
            !probe.is_live_elsewhere(dep_id, rev_id),
            "draining the live dispatcher itself must NOT read as live elsewhere"
        );
    }

    #[test]
    fn liveness_probe_reports_not_live_when_revision_absent_from_live_slot() {
        // Live slot is a newer activation that does NOT serve the revision
        // (genuine removal, no rollback) → not live elsewhere → eviction
        // event fires normally.
        let env_id = "env-1";
        let dep_id = DeploymentId::new();
        let rev_removed = RevisionId::new();
        let rev_other = RevisionId::new();
        let bundle_id = BundleId::new("customer.support");

        let draining = activation_with_ids(env_id, dep_id, rev_removed, bundle_id.clone());
        let draining_dispatcher = std::sync::Arc::clone(&draining.routing.dispatcher);
        // Newer activation serves a DIFFERENT revision under the same deployment.
        let live = activation_with_ids(env_id, dep_id, rev_other, bundle_id);
        let state = serve_state_with(live);

        let probe = SlotLivenessProbe {
            state,
            draining_dispatcher,
        };
        assert!(
            !probe.is_live_elsewhere(dep_id, rev_removed),
            "a genuinely removed revision must NOT read as live elsewhere"
        );
    }
}