ktstr 0.5.2

Test harness for Linux process schedulers
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
//! BPF cast analysis: recover typed pointers from `u64` fields.
//!
//! Schedulers store kernel pointers in BPF map values as raw `u64`
//! (because BTF cannot express a pointer to a per-allocation type),
//! then dereference them later as `struct Q *`. They also stash
//! kernel kptrs (`task_struct *`, `cgroup *`, …) in `u64` slots inside
//! map values that the verifier accepts as plain integers. From BTF
//! alone every such field looks like a counter, so the renderer's
//! native [`btf_rs::Type::Int`] arm has no way to recover the target
//! struct on its own. This module bridges that gap by analysing the
//! BPF program's instruction stream and recording the source / target
//! struct for every `u64` field that is observed to carry a typed
//! pointer. The renderer
//! ([`super::btf_render::render_cast_pointer`]) consumes the resulting
//! [`CastMap`] via [`super::btf_render::MemReader::cast_lookup`] and
//! chases the recovered pointer through the address-space-appropriate
//! reader (arena vs slab/vmalloc) — the same chase shape the
//! [`btf_rs::Type::Ptr`] arm uses for BTF-typed pointers, so cast-
//! recovered and natively-typed pointers render identically.
//!
//! The analysis is intentionally conservative. False negatives (a
//! cast we miss → renderer falls back to raw `u64`, which is the
//! status quo) are acceptable. False positives (a cast we mis-identify
//! → renderer chases garbage and emits structured nonsense) are not.
//!
//! # Algorithm
//!
//! Forward register-state walk over `&[BpfInsn]`. Each register
//! holds one of:
//! - `Unknown`
//! - `Pointer { struct_type_id }` — pointer to BTF struct
//!   `struct_type_id`. Both "address of a struct we will field-access
//!   through" and "kernel kptr being passed around" use this state;
//!   the distinction is the instruction (LDX vs STX) that consumes it.
//! - `LoadedU64Field { source_struct_id, field_offset }` — a 64-bit
//!   value loaded from struct `source_struct_id` at byte
//!   `field_offset`, where the BTF declares the source field as a
//!   plain `u64`. Used for the arena-pointer detection path: a u64
//!   field that is itself dereferenced.
//!
//! Two detection paths emit entries into the [`CastMap`]; a third
//! (BPF_ADDR_SPACE_CAST → arena_confirmed) participates in conflict
//! detection only.
//!
//! 1. **Arena pointer (LDX-side).** On every `BPF_LDX | BPF_MEM`
//!    instruction the destination register is updated according to
//!    the base register's state and the BTF layout at `(struct, off)`.
//!    When the base is a `LoadedU64Field`, the (target_offset,
//!    target_size) access pattern is recorded. After the walk the
//!    recorded patterns are matched against every BTF struct: a
//!    pattern resolves to a unique target struct only if exactly one
//!    struct in the BTF satisfies all observed `(offset, size)` pairs
//!    for that source field. The source struct itself is dropped from
//!    the candidate set before uniqueness is checked — a self-typed
//!    cast (`source.f` → `source*`) would let a self-referential
//!    layout silently win the intersection without any disambiguating
//!    evidence. Tagged with `AddrSpace::Arena`.
//!
//! 2. **Kernel kptr (STX-side).** On every `BPF_STX | BPF_MEM` of
//!    width `BPF_DW` where the destination base is a `Pointer{P}` and
//!    the source register is a `Pointer{T}` AND the BTF declares the
//!    field of `P` at the store offset as a plain `u64`, the
//!    `(P, offset) → T` mapping is recorded directly. Tagged with
//!    `AddrSpace::Kernel`. No BTF-shape inference is needed — `T` is
//!    already known from how the source register became typed (entry
//!    parameter seeded from a FuncProto, propagated through MOV /
//!    stack spill / kfunc return). Self-stores (`P == T`) are
//!    rejected: a typed-pointer aliasing path that resolved
//!    parent and target to the same struct id is almost always the
//!    analyzer's flow-insensitive register tracking confusing two
//!    code paths, and recording a self-store would later resolve to
//!    a chase that loops on the same struct.
//!
//! Stack spill / reload is tracked through `[r10 + neg_off]`: STX
//! through r10 saves the source register's state, LDX through r10
//! restores it. This catches typed pointers that round-trip through
//! the stack across helper calls.
//!
//! Function entry seeding (via [`FuncEntry`]) reseeds R1..R5 with the
//! parameter types from a BTF FuncProto at each function entry PC.
//! The same mechanism handles cross-function jumps inside a single
//! `&[BpfInsn]` slab.
//!
//! Kfunc return values: at every `BPF_CALL` whose `src_reg` is
//! `BPF_PSEUDO_KFUNC_CALL`, `imm` is interpreted as a BTF id; if the
//! kfunc's FuncProto return type peels to `Ptr -> Struct`, R0 is set
//! to `Pointer{struct_type_id}` after the standard R0..R5 clobber.
//!
//! Plain-helper return values: at every `BPF_CALL` whose `src_reg ==
//! 0` (the helper-call form per linux uapi `bpf.h`) AND
//! `imm == BPF_FUNC_map_lookup_elem`, R1's pre-clobber state is
//! consulted. If R1 was [`RegState::DatasecPointer`] into a
//! `BTF_KIND_DATASEC` named `.maps` and the targeted map's BTF
//! declaration carries a `value` member whose type peels to
//! `Ptr -> Struct/Union`, R0 is typed `Pointer{value_struct_id}`
//! after the clobber. Other helper ids leave R0 Unknown — the
//! analyzer keeps a strict per-helper allowlist (currently length 1)
//! to bound false-positive risk. Maps whose value type is a primitive
//! (e.g. stat counters declared `__type(value, u64)`) drop because
//! `Ptr -> u64` does not peel to a Struct/Union.
//!
//! Branches are handled conservatively: on every jump-target PC the
//! pre-pass identifies, register state AND stack-slot state are reset
//! before processing that PC. This drops casts that span branch joins
//! (false negative, acceptable). Function calls clobber `r0..=r5` per
//! the BPF ABI; kfunc and helper return typing happen after the
//! clobber.
//!
//! # Public surface
//!
//! - [`analyze_casts`]: full forward-pass entry point.
//! - [`AddrSpace`]: tag distinguishing arena pointers from kernel
//!   kptrs in the output.
//! - [`CastMap`]: BTreeMap (deterministic iteration order) from
//!   `(source_btf_type_id, field_byte_offset)` to
//!   [`super::btf_render::CastHit`].
//! - [`InitialReg`]: caller-supplied seed register state for entry
//!   parameters / known typed values returned from helpers.
//! - [`FuncEntry`]: function-entry PC + BTF FuncProto id for
//!   automatic R1..R5 seeding from the proto's parameters.
//! - [`SubprogReturn`]: `BPF_PSEUDO_CALL` PC whose resolved subprog
//!   name matches the arena-allocator allowlist; seeds R0 to
//!   [`RegState::ArenaU64FromAlloc`] after the standard R0..=R5
//!   clobber so allocator-return values flow into the STX-flow
//!   arena cast detection path.
//! - [`DatasecPointer`]: caller-supplied annotation pairing a
//!   `BPF_LD_IMM64` PC with its target `BTF_KIND_DATASEC` plus the
//!   byte offset of the referenced global within that section, so
//!   the `BPF_LD_IMM64` arm can set the destination register to
//!   [`RegState::DatasecPointer`] and downstream STX/LDX through
//!   the register fire kptr / arena cast findings against the
//!   datasec's variable layout.
//!
//! # F1 mitigation: arena_confirmed evidence required
//!
//! On aarch64 the 4 GiB arena window catches any 33-bit value as
//! "in arena", so a slot that just happens to hold a 33-bit-shaped
//! counter could be mis-rendered as an arena pointer. Every Arena
//! cast emit therefore requires direct evidence the slot held an
//! arena VA at runtime: either (a) an observed
//! [`BPF_ADDR_SPACE_CAST`] (`ALU64 | MOV | X` with `off=1, imm=1`)
//! on a value loaded from the slot, or (b) an observed STX of an
//! [`RegState::ArenaU64FromAlloc`] value into the slot. Slots with
//! shape-inference evidence ALONE are dropped — the operator can
//! re-enable them by adding either form of direct evidence in the
//! scheduler source.
//!
//! The module does not mutate the BTF object and does not call into
//! libbpf or the kernel — it operates purely on the instruction slice
//! and the parsed BTF. Instructions are represented by [`BpfInsn`], a
//! native Rust struct that mirrors the kernel's on-wire 8-byte layout
//! (`include/uapi/linux/bpf.h struct bpf_insn`). Callers parse program
//! sections out of the raw `.bpf.o` ELF (e.g. via goblin) and feed the
//! resulting byte stream through [`BpfInsn::from_le_bytes`]; the
//! analyzer never invokes `bpf_object__prepare` or any other kernel-
//! side BPF interface. Opcode and register-encoding constants are
//! sourced from `libbpf_rs::libbpf_sys` (the bindgen translation of
//! `linux/include/uapi/linux/bpf.h`) so they track the upstream UAPI
//! without duplicating numeric literals here.

use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};

use btf_rs::{Btf, BtfType, Type};
use libbpf_rs::libbpf_sys as bs;

/// One BPF instruction in the kernel's on-wire encoding.
///
/// Mirrors `struct bpf_insn` from linux `include/uapi/linux/bpf.h`:
/// 8 bytes total, where the second byte packs `dst_reg` (low 4 bits)
/// and `src_reg` (high 4 bits). All multi-byte fields are
/// little-endian per the BPF wire format spec.
///
/// Pure host-side data — no kernel interaction, no FFI. Callers obtain
/// a slice of these from raw program bytes in a `.bpf.o` ELF section.
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct BpfInsn {
    /// Opcode byte: class (low 3 bits) + size/op + mode (high bits).
    pub code: u8,
    /// Packed register byte: `dst_reg | (src_reg << 4)` — 4 bits each
    /// per the wire format. Use [`Self::dst_reg`] / [`Self::src_reg`]
    /// to read. Private so callers cannot bypass the 4-bit packing
    /// invariant; [`Self::new`] and [`Self::from_le_bytes`] are the
    /// only construction paths.
    regs: u8,
    /// Signed 16-bit offset (PC-relative for jumps, byte offset for
    /// mem ops, atomic-op subselect for `BPF_MODE_ATOMIC`).
    pub off: i16,
    /// Signed 32-bit immediate (constant operand, or — for
    /// `BPF_PSEUDO_KFUNC_CALL` — the BTF id of the kfunc).
    pub imm: i32,
}

impl BpfInsn {
    /// Construct an instruction with explicit fields. `dst` and `src`
    /// are 0..=15 register indices (the analyzer rejects 11..=15 at
    /// decode time per `step()`).
    ///
    /// Test-only: production decode uses
    /// [`BpfInsn::from_le_bytes`]; tests construct fixtures directly
    /// to exercise specific opcode/register combinations without a
    /// round-trip through the wire encoder. Gated `#[cfg(test)]` so
    /// release builds do not carry an unused constructor.
    #[cfg(test)]
    pub const fn new(code: u8, dst: u8, src: u8, off: i16, imm: i32) -> Self {
        Self {
            code,
            regs: (dst & 0x0f) | ((src & 0x0f) << 4),
            off,
            imm,
        }
    }

    /// Decode 8 bytes of little-endian wire data into a [`BpfInsn`].
    /// Caller is responsible for chunking program bytes into 8-byte
    /// slots — `BPF_LD_IMM64` consumes two consecutive slots and the
    /// analyzer's `skip_next` flag handles the second one.
    pub fn from_le_bytes(buf: [u8; 8]) -> Self {
        let off = i16::from_le_bytes([buf[2], buf[3]]);
        let imm = i32::from_le_bytes([buf[4], buf[5], buf[6], buf[7]]);
        Self {
            code: buf[0],
            regs: buf[1],
            off,
            imm,
        }
    }

    /// Destination register index (low 4 bits of the packed byte).
    #[inline]
    pub const fn dst_reg(&self) -> u8 {
        self.regs & 0x0f
    }

    /// Source register index (high 4 bits of the packed byte).
    #[inline]
    pub const fn src_reg(&self) -> u8 {
        (self.regs >> 4) & 0x0f
    }

    /// Overwrite the high 4 bits of the packed `regs` byte (the
    /// `src_reg` field), preserving `dst_reg`. Used by the host-side
    /// loader's libbpf-style relocation rewrite to flip a clang-
    /// emitted `BPF_PSEUDO_CALL` into a `BPF_PSEUDO_KFUNC_CALL` after
    /// the kfunc BTF id has been resolved. `pub(crate)` rather than
    /// `pub` because the wire-format invariants for the packed byte
    /// are framework-internal — external callers should construct a
    /// fresh [`BpfInsn::new`] instead of mutating.
    #[inline]
    pub(crate) fn set_src_reg(&mut self, src: u8) {
        self.regs = (self.regs & 0x0f) | ((src & 0x0f) << 4);
    }
}

/// Caller-supplied initial state for one BPF register.
///
/// Used to seed entry-parameter typing or the typed return value of
/// a kfunc. Empty seed lists yield no findings — the analysis only
/// produces output along chains rooted in registers it knows are
/// typed pointers.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct InitialReg {
    /// Register index, `0..=9`. `r10` (frame pointer) is rejected
    /// during seeding because [`analyze_casts`] never derives kernel
    /// struct accesses through it.
    pub reg: u8,
    /// BTF type id of the struct the register points at. The id is
    /// peeled through `Ptr` / `Const` / `Volatile` / `Restrict` /
    /// `Typedef` / `TypeTag` / `DeclTag` chains until a `Struct` or
    /// `Union` is reached; if no struct is reachable the seed is
    /// silently ignored.
    pub struct_type_id: u32,
}

/// Function-entry PC paired with the BTF type id of its FuncProto.
///
/// Caller obtains these from `bpf_func_info` in `.BTF.ext` (or the
/// kernel `bpf_prog_info` accessor that exposes the same array).
/// At each PC matching `insn_offset`, the analyzer clears ALL
/// registers (R0..R10) and drops every stack slot (the linear walk
/// concatenates subprograms, so stale R6..R9 from an unrelated
/// preceding function must not leak into this entry), then reseeds
/// R1..R5 from the FuncProto's parameter list:
/// parameter `i` (zero-indexed) becomes `R{i+1}`. Parameters that
/// peel to `Ptr -> Struct/Union` produce `Pointer{struct_id}`;
/// everything else (scalar, void, function pointer, …) leaves the
/// register `Unknown`. A variadic sentinel terminates the parameter
/// scan (everything after it is unreachable in the BPF calling
/// convention); parameters past R5 are skipped silently.
///
/// `func_proto_id` must resolve to `Type::FuncProto` in the BTF
/// passed to [`analyze_casts`]. If `Type::Func` is given by mistake,
/// the analyzer peels one level (Func->FuncProto) and proceeds.
/// Anything else silently disables seeding for that entry — false
/// negatives are the safe direction. All registers and the stack
/// are still cleared in that case so the unrecoverable proto
/// cannot retain stale state.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct FuncEntry {
    /// Instruction index of the function's first instruction.
    pub insn_offset: usize,
    /// BTF id of the function's prototype (`BTF_KIND_FUNC_PROTO`).
    pub func_proto_id: u32,
}

/// Caller-supplied annotation that flags a `BPF_PSEUDO_CALL` to an
/// in-tree subprog whose return value is a u64 carrying an arena
/// virtual address.
///
/// scx schedulers stash arena pointers in `u64` slots after calling
/// helpers like `scx_static_alloc()` / `scx_alloc_internal()` that
/// return a u64 (NOT a typed pointer). BTF declares the destination
/// field as `u64`, so neither the renderer's [`btf_rs::Type::Ptr`] arm
/// nor the cast analyzer's [`Type::Int`] LDX-shape inference fires —
/// the field looks like a counter. The host-side loader walks
/// [`BPF_PSEUDO_CALL`] sites whose resolved subprog name matches the
/// allocator allowlist and emits one [`SubprogReturn`] per call site.
/// The analyzer applies the annotation at the PC immediately AFTER the
/// call (the BPF ABI clobbers R0..=R5 at the call boundary; this seeds
/// R0 to [`RegState::ArenaU64FromAlloc`] AFTER the clobber so the next
/// move/spill/store of R0 carries the tag forward).
///
/// `insn_offset` is the call PC (not PC+1); the analyzer applies the
/// seed inside its [`BPF_OP_CALL`] arm after the standard register
/// clobber, mirroring how [`Self::handle_kfunc_call`] types R0 from
/// the kfunc's FuncProto return type.
///
/// `alloc_size` is the value of the `size` argument (R1) at the call
/// site, captured by the host-side loader for `scx_static_alloc_internal`
/// call sites. The bump allocator emits no per-slot header that the
/// renderer's [`super::btf_render::MemReader::resolve_arena_type`]
/// bridge could index, so the analyzer threads the sizeof argument
/// through to [`CastHit::alloc_size`] for size-based BTF matching at
/// chase time via [`super::sdt_alloc::discover_payload_btf_id`]. `None`
/// for allocators that DO emit a per-slot header (e.g.
/// `scx_alloc_internal`, where the bridge resolves the payload type via
/// the per-slot header) or when the loader could not find a matching
/// `BPF_MOV64_IMM r1, <size>` instruction within the lookback window.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct SubprogReturn {
    /// Instruction index of the `BPF_PSEUDO_CALL` site.
    pub insn_offset: usize,
    /// `sizeof` argument captured at the call site for
    /// `scx_static_alloc_internal` calls. `None` for any other
    /// allocator.
    pub alloc_size: Option<u64>,
}

/// Caller-supplied annotation that ties a `BPF_LD_IMM64` instruction
/// to its target `BTF_KIND_DATASEC` plus the byte offset of the
/// referenced global within that section.
///
/// Pre-relocation `.bpf.o` bytecode (the input the host-side cast
/// loader sees, before libbpf processes ELF relocations) emits
/// `BPF_LD_IMM64` referencing global variables in `.bss`, `.data`,
/// or `.rodata` with `src_reg == 0` and `imm == 0` — the relocation
/// entry in `.rel.<text>` carries the actual section binding. The
/// host-side loader walks `.rel.<text>`, identifies LD_IMM64 PCs
/// whose target is a datasec section, and emits one `DatasecPointer`
/// per such PC. The analyzer applies the annotation in the
/// `BPF_LD_IMM64` arm to set the destination register state to
/// [`RegState::DatasecPointer { datasec_type_id, base_offset }`],
/// which subsequent STX/LDX through the register treat as a typed
/// pointer into the datasec's variable layout. See linux uapi
/// `bpf.h` `BPF_PSEUDO_MAP_VALUE = 2` for the kernel-side encoding.
///
/// `base_offset` is the byte offset of the referenced global within
/// the datasec. For SHT_REL (the BPF convention — clang emits
/// SHT_REL, not SHT_RELA, for BPF object files), `r_addend` is
/// absent; the offset comes from `LD_IMM64 insn.imm +
/// sym.st_value` and the host-side loader populates `base_offset`
/// from those fields.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct DatasecPointer {
    /// Instruction index of the `BPF_LD_IMM64` to annotate.
    pub insn_offset: usize,
    /// BTF id of the `BTF_KIND_DATASEC` type for the referenced
    /// section.
    pub datasec_type_id: u32,
    /// Byte offset of the referenced global within the datasec.
    pub base_offset: u32,
}

/// Address space of a recovered cast target.
///
/// Distinguishes the two detection paths: arena pointers carry an
/// arena virtual address; kernel kptrs carry a kernel virtual
/// address (slab / vmalloc / per-cpu). Both share the same
/// `(source, offset) -> target` shape. The renderer treats
/// `AddrSpace` as a hint — runtime is-arena-window detection on
/// the actual pointer value is authoritative — so a misclassified
/// finding still chases through the correct reader.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AddrSpace {
    /// Arena pointer: a `u64` slot whose stored value is an arena
    /// virtual address. Recovered by tracking LDX-through-LoadedU64.
    Arena,
    /// Kernel kptr: a `u64` slot whose stored value is a kernel
    /// virtual address (slab / vmalloc / per-cpu). Recovered by
    /// tracking STX of a typed `Pointer{T}` register.
    Kernel,
}

impl std::fmt::Display for AddrSpace {
    /// Renders as the lowercase address-space tag (`"arena"` /
    /// `"kernel"`) for free-form formatting (error messages, log
    /// lines). The renderer side bypasses `Display` and uses an
    /// exhaustive `match` over the variant set in
    /// `crate::monitor::btf_render::cast_annotation_for` to hand
    /// back static `&'static str` annotations
    /// (`"cast→arena"`, `"cast→arena (sdt_alloc)"`, `"cast→kernel"`,
    /// `"cast→kernel (sdt_alloc)"`) — so the operator-visible
    /// `cast_annotation` field is allocation-free per chase. A
    /// new `AddrSpace` variant added here must also add a row in
    /// `cast_annotation_for`'s match; the compiler enforces it.
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let s = match self {
            AddrSpace::Arena => "arena",
            AddrSpace::Kernel => "kernel",
        };
        f.write_str(s)
    }
}

/// One recovered cast finding, returned by
/// [`super::btf_render::MemReader::cast_lookup`] to tell the
/// renderer that a `u64` field at
/// `(parent_struct_btf_id, member_byte_offset)` actually carries a
/// pointer to a struct whose BTF id is `target_type_id`. The
/// `addr_space` tag is a HINT from the analyzer; the renderer
/// applies runtime detection on the actual pointer value to pick
/// arena vs kernel chasing.
///
/// `Copy` so the renderer can hand it across helper boundaries
/// without lifetime gymnastics; the type is small.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct CastHit {
    /// BTF type id of the recovered target struct/union. `0` means
    /// the analyzer's STX-flow path tagged the slot as Arena WITHOUT
    /// resolving the target type — the renderer's chase path
    /// supplies the real payload BTF id via the
    /// [`super::btf_render::MemReader::resolve_arena_type`] bridge,
    /// or via the [`Self::alloc_size`]-driven static-alloc fallback
    /// when the bridge has no entry.
    pub target_type_id: u32,
    /// Address-space hint from the analyzer (arena vs kernel).
    /// The renderer ignores this for dispatch — runtime
    /// is-arena-window detection on the pointer value drives the
    /// choice — but it is preserved as evidence so an operator can
    /// see whether the analyzer's hint matched what the runtime
    /// chase resolved to.
    pub addr_space: AddrSpace,
    /// Captured `sizeof` argument from the producing
    /// `scx_static_alloc_internal` call site, recorded by the
    /// host-side loader and threaded through the analyzer's
    /// per-slot index (see
    /// [`Analyzer::arena_alloc_size_index`]). Populated only on
    /// arena STX-flow findings whose source register's
    /// [`RegState::ArenaU64FromAlloc`] carried a captured size;
    /// `None` for any other allocator path (kfunc allocator,
    /// `scx_alloc_internal` per-slot-header path, shape-inference,
    /// kernel kptr STX, heuristic-synthesized arena tags) AND for
    /// slots where multiple STX writes recorded disagreeing sizes
    /// (the per-slot index collapsed to `None`).
    ///
    /// At chase time the renderer's
    /// [`super::btf_render::chase_arena_pointer`] consults this
    /// field after the [`super::btf_render::MemReader::resolve_arena_type`]
    /// bridge returns no entry: `Some(n)` triggers a size-based
    /// BTF match via [`super::sdt_alloc::discover_payload_btf_id`],
    /// which is the only resolution path for `scx_static_alloc_internal`
    /// allocations whose bump-allocator emits no per-slot header
    /// the bridge could index.
    pub alloc_size: Option<u64>,
}

/// Output of [`analyze_casts`].
///
/// Maps `(source_btf_type_id, field_byte_offset)` to the recovered
/// target's [`CastHit`]. The map is `BTreeMap` so iteration order
/// is deterministic, which makes test assertions stable without a
/// sort step at every assertion site.
pub type CastMap = BTreeMap<(u32, u32), CastHit>;

/// Maximum BTF type-id the candidate-search loop probes per pattern.
///
/// `btf_rs` does not expose a "list all type ids" iterator. The
/// matcher walks ids `1..=max_observed_id` instead, where
/// `max_observed_id` is the largest id touched during the forward
/// pass plus this slack. Real ktstr program BTFs top out in the low
/// thousands of types; the slack is generous so a struct that only
/// appears in the BTF (not yet referenced by any instruction we
/// processed) can still match. The hard cap
/// [`super::sdt_alloc::MAX_BTF_ID_PROBE`] backstops a pathological /
/// synthesized BTF.
const CANDIDATE_SEARCH_SLACK: u32 = 65_536;

/// Per-register state during the forward walk.
///
/// `PartialEq` participates in [`analyze_casts`]'s fixpoint loop: each
/// pass extracts [`Analyzer::caller_arg_types`] (a
/// `HashMap<usize, [RegState; 5]>`) and compares it byte-for-byte
/// against the prior pass's snapshot. Convergence is detected when the
/// map stops changing, so `RegState` equality is the propagation
/// primitive for the whole fixpoint. All variant payloads (u32 / i16)
/// already implement `PartialEq` so the derive is straightforward.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum RegState {
    Unknown,
    /// Register holds a pointer to a known BTF struct.
    Pointer {
        struct_type_id: u32,
    },
    /// Register holds a `u64` value loaded from `(struct,
    /// field_offset)`; the BTF declares the source field as a plain
    /// 8-byte unsigned integer.
    LoadedU64Field {
        source_struct_id: u32,
        field_offset: u32,
    },
    /// Register holds a pointer into a `BTF_KIND_DATASEC` map value
    /// (a `.bss`, `.data`, `.rodata`, or `.data.<name>` global
    /// section), at a known byte offset within that section.
    /// Produced by `BPF_LD_IMM64` with `src_reg ==
    /// BPF_PSEUDO_MAP_VALUE`, where the first instruction slot's
    /// `imm` field carries the byte offset of the referenced global
    /// (added to the section symbol's address, which is 0 for
    /// `STT_SECTION` symbols, so the imm IS the offset).
    ///
    /// STX through this register at insn-offset `N` writes to
    /// `(datasec_type_id, base_offset + N)`. The offset is resolved
    /// against the datasec's `VarSecinfo` entries via
    /// [`struct_member_at`]: each entry spans `[var.offset(),
    /// var.offset() + var.size())` and points at a `BTF_KIND_VAR`
    /// whose underlying type is the global's actual C type. A
    /// hit on a u64-typed Var triggers the kptr finding path
    /// just like a struct member would.
    ///
    /// See linux uapi `bpf.h` `BPF_PSEUDO_MAP_VALUE = 2` and the
    /// libbpf relocation logic in `tools/lib/bpf/libbpf.c`'s
    /// `bpf_program__resolve_map_value_relos` (libbpf rewrites
    /// `R_BPF_64_64` relocations against `STT_SECTION` symbols on
    /// `.bss`/`.data`/`.rodata` into LD_IMM64 instructions with
    /// `src_reg = BPF_PSEUDO_MAP_VALUE`). The host-side cast loader
    /// does not see post-relocation bytecode (the embedded
    /// `.bpf.objs` blob carries the raw `.bpf.o` ELF), so it
    /// reconstructs the same mapping by walking `.rel.<text>`
    /// sections and emitting [`DatasecPointer`] entries that the
    /// analyzer applies at the same insn PC.
    DatasecPointer {
        datasec_type_id: u32,
        base_offset: u32,
    },
    /// Register holds a `u64` value the analyzer believes is an arena
    /// virtual address — either because it came directly from an
    /// allocator-return seed at a [`SubprogReturn::insn_offset`], OR
    /// because it was loaded from a slot the analyzer previously
    /// tagged as Arena via the STX-flow path (alias-set tracking).
    ///
    /// Distinct from [`Self::LoadedU64Field`]: that variant tracks a
    /// generic u64 whose downstream LDX accesses constrain shape
    /// inference. This variant has stronger evidence (the value came
    /// from an allowlisted allocator OR an already-arena-tagged
    /// field), so the STX of this state into a `u64` field of a
    /// typed `Pointer{P}` parent records `(P, off)` as an Arena cast
    /// finding directly — no shape inference required.
    ///
    /// `source` carries the `(parent_struct_id, field_offset)` slot
    /// the value was loaded from, when known. Populated at the
    /// alias-tracking LDX site in [`Analyzer::handle_ldx`] when an
    /// LDX through a typed `Pointer{P}` reads a u64 field whose slot
    /// is already in [`Analyzer::arena_stx_findings`]. `None` for the
    /// allocator-return seed paths (`SubprogReturn`,
    /// `handle_kfunc_call`, caller_arg_types propagation,
    /// `bridge_map_value_spill`) where the slot identity is recorded
    /// separately into `arena_stx_findings` by the producing site —
    /// the value register itself has no in-frame slot of origin to
    /// attach.
    ///
    /// Consumed by the `RegState::ArenaU64FromAlloc` arm of
    /// [`Analyzer::handle_ldx`]: when `source` is `Some`, the
    /// downstream LDX through this register records the access
    /// pattern `(target_offset, target_size)` against the source slot
    /// in [`Analyzer::patterns`], feeding shape inference at finalize.
    /// `None` skips the recording — false negative on shape
    /// inference is the safe direction.
    ///
    /// The slot identity is meaningful only WITHIN the function whose
    /// LDX produced it: an arena pointer flowing across a
    /// BPF_PSEUDO_CALL into a callee's R1..R5 has no source-slot
    /// meaning in the callee's frame. Cross-function propagation
    /// (caller_arg_types snapshot at every BPF_PSEUDO_CALL, FuncEntry
    /// reseed of R1..R5) strips `source` to `None` so the callee
    /// retains the arena tag without inheriting a stale in-caller
    /// slot identity.
    ///
    /// `alloc_size` is the `sizeof` argument captured at the
    /// `scx_static_alloc_internal` call site by the host-side loader.
    /// `Some(n)` for those allocator-return seeds (and any state that
    /// inherits from one via MOV / spill / alias-tracking from a slot
    /// whose seed carried the size); `None` for `scx_alloc_internal`
    /// and other allocators with a per-slot header that the renderer's
    /// [`super::btf_render::MemReader::resolve_arena_type`] bridge
    /// resolves directly. At STX time, the value rides into the
    /// per-slot alloc-size index alongside `arena_stx_findings`, and at
    /// finalize each emitted [`CastHit`] for the slot carries the
    /// captured size so the renderer's chase path can size-match the
    /// payload BTF type via [`super::sdt_alloc::discover_payload_btf_id`].
    /// Cross-function propagation (caller_arg_types) preserves
    /// `alloc_size` even after stripping `source` — the size is a
    /// property of the producing call site, not the in-caller frame.
    ArenaU64FromAlloc {
        source: Option<(u32, u32)>,
        alloc_size: Option<u64>,
    },
    /// Register holds a frame-pointer-relative address: `r10 + offset`.
    /// Produced by `MOV rX, r10` (offset = 0) and propagated through
    /// `ALU64 | ADD | K` (offset += imm). Consumed by the
    /// `bpf_map_update_elem` handler to identify the stack region
    /// backing the map value struct.
    FrameAddr {
        offset: i16,
    },
}

/// Observed `(offset, size_bytes)` access through a `LoadedU64Field`
/// register. Stored in a set per source `(struct, field)` so duplicate
/// patterns coalesce.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
struct Access {
    offset: u32,
    size: u32,
}

/// Run the cast analysis over `insns` against `btf`. `initial_regs`
/// seeds the entry / known-pointer registers before the walk starts.
/// `func_entries` identifies function-entry PCs whose R1..Rn should
/// be reseeded from a BTF FuncProto's parameter list before the
/// instruction at that PC executes. `datasec_pointers` annotates
/// `BPF_LD_IMM64` PCs that resolve (after libbpf relocation) to a
/// `BPF_PSEUDO_MAP_VALUE` reference into a `.bss` / `.data` /
/// `.rodata` global section — see [`DatasecPointer`]. `subprog_returns`
/// annotates `BPF_PSEUDO_CALL` sites whose resolved subprog name
/// matches the arena-allocator allowlist (e.g. `scx_static_alloc_internal`,
/// `scx_alloc_internal`, `bpf_arena_alloc_pages`); after the standard
/// R0..=R5 clobber the analyzer seeds R0 to
/// [`RegState::ArenaU64FromAlloc`] so the value flows into STX-side
/// arena cast detection. See [`SubprogReturn`].
///
/// `initial_regs`, `func_entries`, `datasec_pointers`, and
/// `subprog_returns` compose: seeds apply once at PC 0, function-entry
/// reseeding applies at every matching `insn_offset`, datasec
/// annotations apply at every matching `BPF_LD_IMM64` PC, and
/// allocator-return seeds apply at every matching `BPF_PSEUDO_CALL`
/// PC. Reseeding clears ALL registers (R0..R10) and drops every stack
/// slot (subprog entry semantics: the callee's frame is fresh, and
/// stale R6..R9 from linearly-preceding unrelated functions must not
/// leak). R1..R5 are then re-seeded from the FuncProto's parameter
/// types where they resolve to struct pointers.
///
/// The plain-helper return arm in [`Analyzer::step`] does not consume
/// caller-supplied annotations — it derives R0's typing from the
/// analyzer's pre-clobber view of R1 (which the existing datasec
/// annotation pipeline already populates with
/// [`RegState::DatasecPointer`] when the LD_IMM64 of R1 targets the
/// `.maps` BTF datasec). No new caller-side annotation list is
/// needed: a `bpf_map_lookup_elem` call site whose R1 is sourced
/// from a `.maps` LD_IMM64 already has all the evidence the arm
/// requires by the time the call is processed.
///
/// The analysis ignores any [`BpfInsn`] it cannot decode (unknown
/// opcode, malformed encoding) — those manifest as false negatives,
/// the safe direction. Empty input or input that produces no
/// typed-pointer-rooted load or store yields an empty [`CastMap`].
///
/// # Fixpoint iteration
///
/// The forward walk is a single linear pass that records two pieces
/// of cross-call evidence as it goes:
/// - At every `BPF_PSEUDO_CALL` site, the caller's view of `R1..R5`
///   keyed by the callee's entry PC ([`Analyzer::caller_arg_types`]).
/// - At every STX of an [`RegState::ArenaU64FromAlloc`] value into a
///   typed `Pointer{P}` parent's u64 field, the slot key
///   ([`Analyzer::arena_stx_findings`]).
///
/// Both pieces of evidence support typing decisions made AT lower PCs
/// than the producer:
/// - Subprograms declared with `u64` parameters that actually carry
///   typed-pointer values (the BPF idiom for arena-pointer subprog
///   args) cannot be typed by the linear walk alone: the caller's
///   typed register state reaches the call site, but on the first
///   pass the analyzer has not yet propagated those types into the
///   callee's register file when the callee body is processed (the
///   linear walk visits the callee's instructions BEFORE the caller's
///   call site if the callee text precedes the caller in the program
///   slab, AND even when the order is reverse, the seed-from-caller
///   logic at the FuncEntry boundary only upgrades a register from
///   `Unknown` — which requires the caller's state to already be
///   recorded).
/// - The alias-tracking arm in [`Analyzer::handle_ldx`] checks
///   `arena_stx_findings` to decide whether an LDX through a
///   previously-arena-tagged slot loads an arena VA. Consumer LDX
///   sites at lower PCs than the producer (a separate subprog whose
///   text precedes the producer's, or a caller that reads the slot
///   before the bridge / STX-flow path tags it) cannot see the
///   evidence on a single pass.
///
/// To recover those typings, [`analyze_casts`] runs the forward walk
/// repeatedly. Each iteration:
/// 1. Builds a fresh [`Analyzer`] (or one pre-populated with the prior
///    iteration's `caller_arg_types` and `arena_stx_findings` via
///    [`Analyzer::with_carryover`]).
/// 2. Seeds entry registers and runs the full forward walk.
/// 3. Extracts the post-walk `caller_arg_types` and
///    `arena_stx_findings` via [`Analyzer::into_carryover`].
/// 4. Compares both extracted maps against the prior iteration's. If
///    identical, the fixpoint has been reached.
///
/// Only `caller_arg_types` and `arena_stx_findings` carry across
/// passes. The remaining analyzer state — `kptr_findings`, `patterns`,
/// `arena_confirmed`, the register file, the stack-slot map — is
/// rebuilt from scratch each pass. Carrying `arena_stx_findings` is
/// safe because every entry is sourced from direct evidence the slot
/// held an arena VA (an allocator-return seed, an
/// `ARENA_ALLOC_KFUNC_NAMES` allowlist hit, or
/// [`Analyzer::bridge_map_value_spill`] reading an
/// `ArenaU64FromAlloc` stack slot); the map grows monotonically across
/// passes (insertion sites never erase a `Pending` entry) and is
/// bounded above by the number of distinct
/// `(parent_struct_id, field_offset)` slots in the program BTF, so
/// the fixpoint converges in a finite number of iterations.
///
/// The loop is capped at [`MAX_PASSES`] iterations to bound work on
/// programs that resist convergence. The cap matches the BPF
/// verifier's call-depth limit (`MAX_CALL_FRAMES = 8` in linux
/// `kernel/bpf/verifier.h`), since each additional layer of typed-arg
/// propagation corresponds to one more layer of subprog nesting. A
/// program that has not converged after 8 passes will not converge
/// further along the call-depth axis — its caller_arg_types snapshot
/// has stopped accumulating new typings, which is a
/// false-negative-safe terminus. The final iteration runs once more
/// with the converged carry-over so [`Analyzer::finalize`] sees the
/// full set of findings produced under the converged maps.
pub fn analyze_casts(
    insns: &[BpfInsn],
    btf: &Btf,
    initial_regs: &[InitialReg],
    func_entries: &[FuncEntry],
    datasec_pointers: &[DatasecPointer],
    subprog_returns: &[SubprogReturn],
) -> CastMap {
    let targets = jump_targets(insns);
    let mut caller_args: CallerArgTypes = HashMap::new();
    let mut arena_stx: ArenaStxFindings = BTreeMap::new();
    let mut alloc_size_idx: ArenaAllocSizeIndex = BTreeMap::new();

    for pass in 0..MAX_PASSES {
        let mut a = if pass == 0 {
            Analyzer::new(btf)
        } else {
            Analyzer::with_carryover(
                btf,
                caller_args.clone(),
                arena_stx.clone(),
                alloc_size_idx.clone(),
            )
        };
        a.seed(initial_regs);
        a.run(
            insns,
            &targets,
            func_entries,
            datasec_pointers,
            subprog_returns,
        );
        let (next_args, next_stx, next_alloc_size) = a.into_carryover();
        // Convergence: both `arena_stx_findings` AND
        // `arena_alloc_size_index` must stabilise. The index is
        // populated alongside the findings map at every STX site, so
        // in practice they converge together — but a STX writing the
        // same slot twice with disagreeing sizes flips the index from
        // `Some(n)` to `None` without affecting `arena_stx_findings`,
        // and we want that flip to trigger another pass so the
        // finalize emits the conservative `None`.
        if next_stx == arena_stx && next_alloc_size == alloc_size_idx && pass > 0 {
            // Converged: re-run one more time with the converged
            // carry-over so finalize() sees a fully populated
            // analyzer. `into_carryover` consumed `a` above, so a
            // fresh analyzer is required to call finalize() on; the
            // run is idempotent under stable carry-over (the carried
            // maps are monotonic and stable here, so every other
            // analyzer state — patterns, kptr_findings,
            // arena_confirmed — is rebuilt to the same shape it had
            // on the iteration that detected convergence).
            let mut final_a = Analyzer::with_carryover(btf, next_args, next_stx, next_alloc_size);
            final_a.seed(initial_regs);
            final_a.run(
                insns,
                &targets,
                func_entries,
                datasec_pointers,
                subprog_returns,
            );
            return final_a.finalize();
        }
        caller_args = next_args;
        arena_stx = next_stx;
        alloc_size_idx = next_alloc_size;
    }

    // Cap reached without convergence: finalize on the last
    // observed carry-over. False negatives on the still-changing
    // tail of the propagation are the safe direction.
    let mut final_a = Analyzer::with_carryover(btf, caller_args, arena_stx, alloc_size_idx);
    final_a.seed(initial_regs);
    final_a.run(
        insns,
        &targets,
        func_entries,
        datasec_pointers,
        subprog_returns,
    );
    final_a.finalize()
}

/// Maximum fixpoint iterations [`analyze_casts`] runs before bailing
/// on convergence. Set to 8 to mirror the BPF verifier's
/// `MAX_CALL_FRAMES` call-depth limit (linux
/// `kernel/bpf/verifier.h`): each additional pass propagates
/// caller-arg typings one nesting level deeper, so 8 passes cover the
/// deepest call graph the verifier accepts. A program that would need
/// pass 9 to converge cannot exist in valid BPF bytecode the analyzer
/// is asked to inspect.
const MAX_PASSES: usize = 8;

/// Per BPF_PSEUDO_CALL site, snapshot of the caller's R1..R5 keyed by
/// the callee's entry PC. See [`Analyzer::caller_arg_types`].
type CallerArgTypes = HashMap<usize, [RegState; 5]>;

/// Per `(parent_struct_id, field_byte_offset)` slot, the arena-STX
/// finding state. See [`Analyzer::arena_stx_findings`].
type ArenaStxFindings = BTreeMap<(u32, u32), ArenaStxEntry>;

/// Per `(parent_struct_id, field_byte_offset)` slot, the captured
/// `sizeof` argument from the producing
/// `scx_static_alloc_internal` call site. Carries across fixpoint
/// passes alongside [`ArenaStxFindings`] so a slot tagged on a later
/// pass still carries the size when it propagates back through
/// alias-tracking. See [`Analyzer::arena_alloc_size_index`].
type ArenaAllocSizeIndex = BTreeMap<(u32, u32), Option<u64>>;

struct Analyzer<'a> {
    btf: &'a Btf,
    regs: [RegState; 11],
    /// Per `(source_struct, field_offset)` set of `(target_offset,
    /// target_size)` accesses observed via the arena LDX path.
    patterns: BTreeMap<(u32, u32), BTreeSet<Access>>,
    /// Direct kptr findings keyed by `(source_struct_id,
    /// field_offset)` (the struct that owns the slot, named
    /// "source" for parity with `patterns` above). Populated by the
    /// STX path when a `Pointer{T}` value register is stored into a
    /// `u64` field. The map's value is the inner struct id `T`.
    /// Conflicting writes (same slot, different `T`) collapse to a
    /// sentinel that finalize() drops — ambiguity is a false
    /// negative, never a false positive.
    kptr_findings: BTreeMap<(u32, u32), KptrEntry>,
    /// Stack-slot map keyed by frame-pointer-relative byte offset
    /// (always negative). STX through r10 saves the source register
    /// state; LDX through r10 restores it. Cleared at every
    /// jump-target PC alongside the register file.
    stack_slots: BTreeMap<i16, RegState>,
    /// Fields confirmed as arena pointers by a `BPF_ADDR_SPACE_CAST`
    /// instruction (code=0xBF, off=1, imm=1). Keyed by
    /// `(source_struct_id, field_byte_offset)`.
    ///
    /// Two roles:
    /// 1. Veto a kptr finding when the same slot was also observed as
    ///    the source of an arena cast (the slot cannot simultaneously
    ///    hold an arena VA and a kernel VA — the conflict drop set
    ///    in [`Self::finalize`] uses this).
    /// 2. Gate the shape-inference path: an entry in
    ///    [`Self::patterns`] alone is not enough evidence to emit an
    ///    Arena cast hit (the LDX-shape inference can match
    ///    coincidentally on schedulers whose program BTF carries
    ///    same-shape unrelated structs). `arena_confirmed` is the
    ///    direct evidence that the value held in the slot was an
    ///    arena pointer — required for the shape-inference emit per
    ///    the F1 hostile-input mitigation. The new STX-flow path
    ///    (see [`Self::arena_stx_findings`]) carries its own evidence
    ///    (allocator-return → field) and emits independently.
    arena_confirmed: BTreeSet<(u32, u32)>,
    /// Fields where an [`RegState::ArenaU64FromAlloc`] register was
    /// stored into a `u64` slot of a typed `Pointer{P}` (or
    /// `DatasecPointer`) parent. Keyed by
    /// `(parent_struct_id, field_byte_offset)`.
    ///
    /// Direct evidence the slot holds an arena VA: the value came
    /// from an allocator return (e.g. `scx_static_alloc()`) or
    /// propagated from another already-arena-tagged slot. Conflicting
    /// cross-path observations (a typed `Pointer{T}` STX into the same
    /// slot, indicating a kernel kptr write) are detected by
    /// [`Self::finalize`]'s conflict-drop set, which cross-references
    /// `arena_stx_findings` keys against `kptr_findings` keys and
    /// rejects the slot from BOTH sides — false positive is
    /// unacceptable, false negative is the safe direction. Within
    /// `arena_stx_findings` itself, all current insertions resolve
    /// to [`ArenaStxEntry::Pending`] (see the enum doc for the
    /// `Conflicting` variant's defensive role).
    arena_stx_findings: BTreeMap<(u32, u32), ArenaStxEntry>,
    /// Captured `sizeof` argument per arena STX slot, populated by
    /// the [`StxValueKind::Arena`] arm of [`Self::handle_stx`]
    /// alongside [`Self::arena_stx_findings`]. Keyed identically:
    /// `(parent_struct_id, field_byte_offset)`. Value `Some(n)` when
    /// the storing register's [`RegState::ArenaU64FromAlloc`] carried
    /// an `alloc_size` (i.e. the value originated at a
    /// `scx_static_alloc_internal` call site captured by the
    /// host-side loader); `None` for any other allocator (including
    /// `scx_alloc_internal` whose payload type the renderer's
    /// [`super::btf_render::MemReader::resolve_arena_type`] bridge
    /// resolves via the per-slot header).
    ///
    /// At [`Self::finalize`] each emitted [`CastHit`] for an arena
    /// STX slot carries the captured size in its
    /// [`CastHit::alloc_size`] field. The chase path consults that
    /// size via [`super::sdt_alloc::discover_payload_btf_id`] when
    /// the bridge returns no entry for the slot — the `static_alloc`
    /// fallback that resolves the payload BTF type by size match.
    ///
    /// Conflicting captures across multiple STX writes to the same
    /// slot collapse to `None` rather than picking one — the slot's
    /// observation became ambiguous and the renderer must fall back
    /// to the bridge or skip with a clear reason. A keyed-but-`None`
    /// entry is distinct from an absent key: absent means no arena
    /// STX captured a size at all.
    arena_alloc_size_index: BTreeMap<(u32, u32), Option<u64>>,
    /// Largest type id touched while resolving sources (struct
    /// pointer types and u64-field source structs). Used to bound
    /// the matcher's id walk below
    /// [`super::sdt_alloc::MAX_BTF_ID_PROBE`].
    max_seen_type_id: u32,
    /// Count of [`SubprogReturn`] / kfunc-allowlist allocator-seed
    /// applications during the forward walk. Incremented every
    /// time the analyzer sets R0 to [`RegState::ArenaU64FromAlloc`]
    /// from either:
    /// - a caller-supplied [`SubprogReturn`] match in the
    ///   `BPF_OP_CALL` arm, OR
    /// - the `ARENA_ALLOC_KFUNC_NAMES` allowlist match in
    ///   [`Self::handle_kfunc_call`].
    ///
    /// Used by [`Self::finalize`] to gate the F4 mitigation warn
    /// (`allocator helpers may need __always_inline`): the warn
    /// must only fire when allocator call sites WERE seen but
    /// produced NO `arena_stx_findings`, which is the actual
    /// "non-inlined helper" signature. Firing when
    /// `arena_stx_findings` is non-empty but `arena_confirmed` is
    /// empty (the prior gate) was too broad — that condition
    /// matches the normal STX-flow path where the allocator IS
    /// inlined and its R0 reaches a STX into a typed slot.
    alloc_seeds_applied: u32,
    /// Pre-call register state for BPF_PSEUDO_CALL sites, keyed by
    /// the callee's entry PC (= call_pc + 1 + insn.imm). When
    /// `seed_from_func_proto` finds a u64 parameter that the
    /// FuncProto doesn't type as Ptr→Struct, it consults this map
    /// to recover the caller's typed Pointer state for that argument
    /// register. Covers the common BPF pattern where subprograms
    /// take `u64 taskc_raw` parameters that are actually typed
    /// arena pointers — the FuncProto declares u64, but the caller
    /// passes a Pointer{T} that the linear walk tracked.
    caller_arg_types: std::collections::HashMap<usize, [RegState; 5]>,
    /// Reverse mapping from stack slot offset to the
    /// `(value_struct_id, member_offset)` key in `arena_stx_findings`
    /// that the `bridge_map_value_spill` method recorded from that
    /// slot. Used by the spill path to invalidate bridge-derived
    /// findings when a non-arena value overwrites the originating
    /// stack slot.
    bridge_slot_origins: std::collections::HashMap<i16, (u32, u32)>,
    /// True when the current function has received an
    /// ArenaU64FromAlloc from a SubprogReturn (allocator call).
    /// The bridge uses this as a function-level heuristic: if the
    /// function allocated arena memory, its bpf_map_update_elem
    /// values likely store arena pointers. Cleared at FuncEntry.
    func_has_alloc: bool,
    /// Register state snapshots from conditional branch sources.
    /// Keyed by branch target PC. At each conditional jump, the
    /// current register state is saved for the target. When the
    /// walker reaches the target, it merges: if the fall-through
    /// path left a register Unknown but the branch source had it
    /// typed, the typed value is restored.
    branch_source_regs: std::collections::HashMap<usize, [RegState; 11]>,
}

/// Kptr finding state: a single `(parent, offset)` slot may be
/// written by code paths that disagree on the target type. The
/// analyzer collapses the disagreement to `Conflicting` so finalize()
/// can drop it.
#[derive(Debug, Clone, Copy)]
enum KptrEntry {
    /// Single observed target type id. Always non-zero in practice —
    /// `Pointer{T}` source registers carry a real BTF type id and the
    /// `Pointer{T}` STX path is the only insertion site for this
    /// variant. (The arena-STX path uses the sibling
    /// [`ArenaStxEntry`] enum, not this one, so a stale "0 means
    /// deferred resolve" reading does not apply here.)
    Single(u32),
    /// Two or more disjoint target ids observed; drop the slot.
    Conflicting,
}

/// Arena STX finding state for [`Analyzer::arena_stx_findings`]. A
/// single `(parent, offset)` slot may be written by an
/// [`RegState::ArenaU64FromAlloc`] STX (records `Pending`) and, in
/// principle, also by a typed-pointer kptr STX (which records into
/// the sibling [`Analyzer::kptr_findings`], not here).
///
/// The variant set is deliberately distinct from [`KptrEntry`]: the
/// arena STX path has no per-finding payload (the renderer's
/// [`super::btf_render::MemReader::resolve_arena_type`] bridge
/// recovers the actual payload BTF type id at chase time), so reusing
/// `KptrEntry::Single(0)` as a "deferred resolve pending" sentinel
/// would conflate two different concepts at the type level. A
/// dedicated enum makes "this slot saw an arena STX" a single
/// variant ([`Self::Pending`]) and keeps `KptrEntry::Single(0)`'s
/// meaning unambiguous in the kptr path.
///
/// `Conflicting` is preserved for symmetry with [`KptrEntry`] and as
/// a defensive landing pad: today the only insertion path for
/// `arena_stx_findings` is the `StxValueKind::Arena` arm of
/// [`Analyzer::handle_stx`], which only ever inserts
/// [`Self::Pending`]. The arena-STX dedup arm in `handle_stx`
/// therefore matches `Some(Self::Pending)` exhaustively as a no-op
/// and treats `Some(Self::Conflicting)` as `unreachable!()`. If a
/// future code path adds a way to record disagreement on the same
/// slot from the arena side, it can use this variant; finalize's
/// filter will drop it identically to today.
///
/// `PartialEq` participates in [`analyze_casts`]'s fixpoint loop
/// alongside [`RegState`]: each pass extracts the analyzer's
/// `arena_stx_findings` map and compares it against the prior pass's
/// snapshot. Convergence is detected when both
/// [`Analyzer::caller_arg_types`] and [`Analyzer::arena_stx_findings`]
/// stop changing, so `ArenaStxEntry` equality is the propagation
/// primitive for the arena-tagging half of the fixpoint. Both variants
/// are unit so the derive is straightforward.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ArenaStxEntry {
    /// Allocator-tagged value observed at the slot. The renderer's
    /// [`super::btf_render::MemReader::resolve_arena_type`] bridge
    /// supplies the actual payload BTF type id at chase time — the
    /// analyzer emits `target_type_id == 0` from finalize and the
    /// bridge fills in the real id.
    Pending,
    /// Two or more disagreeing observations — drop the slot.
    /// Unreachable through today's insertion paths but kept for
    /// symmetry with [`KptrEntry::Conflicting`] and as a defensive
    /// terminal so finalize's filter survives a future enrichment
    /// of the arena-STX path. `#[allow(dead_code)]` because no
    /// current insertion site constructs the variant; the
    /// `unreachable!()` in [`Analyzer::handle_stx`]'s arena dedup
    /// arm and the defensive filter in [`Analyzer::finalize`] both
    /// reference it as a pattern only. Removing the variant would
    /// drop the documented design margin and force a churn of the
    /// match shape if a future enrichment ever needs it.
    #[allow(dead_code)]
    Conflicting,
}

/// Discriminator for the value-side state at a STX site that passed
/// the BTF u64 gate. Used by [`Analyzer::handle_stx`] to dispatch the
/// kptr vs arena finding paths from one match arm rather than
/// re-pattern-matching [`RegState`] inside the recording logic.
#[derive(Debug, Clone, Copy)]
enum StxValueKind {
    /// Source register held [`RegState::Pointer`]: record into
    /// [`Analyzer::kptr_findings`].
    Kptr { target: u32 },
    /// Source register held [`RegState::ArenaU64FromAlloc`]: record
    /// into [`Analyzer::arena_stx_findings`]. `alloc_size` carries
    /// the captured `sizeof` argument from the producing
    /// `scx_static_alloc_internal` call site (or `None` for kfunc /
    /// `scx_alloc_internal` / heuristic-synthesized arena tags) so
    /// the per-slot index in
    /// [`Analyzer::arena_alloc_size_index`] can record the size for
    /// the renderer's chase-time BTF size match.
    Arena { alloc_size: Option<u64> },
    /// Source register held a non-pointer state (Unknown,
    /// LoadedU64Field, DatasecPointer). Invalidates any prior
    /// arena_stx_findings entry for the same slot — the slot is
    /// used for non-arena values on at least one code path, making
    /// the arena observation ambiguous.
    Unknown,
}

impl<'a> Analyzer<'a> {
    fn new(btf: &'a Btf) -> Self {
        Self {
            btf,
            regs: [RegState::Unknown; 11],
            patterns: BTreeMap::new(),
            kptr_findings: BTreeMap::new(),
            stack_slots: BTreeMap::new(),
            arena_confirmed: BTreeSet::new(),
            arena_stx_findings: BTreeMap::new(),
            arena_alloc_size_index: BTreeMap::new(),
            max_seen_type_id: 0,
            alloc_seeds_applied: 0,
            caller_arg_types: HashMap::new(),
            bridge_slot_origins: HashMap::new(),
            func_has_alloc: false,
            branch_source_regs: HashMap::new(),
        }
    }

    /// Construct an analyzer pre-populated with a prior iteration's
    /// `caller_arg_types`, `arena_stx_findings`, and
    /// `arena_alloc_size_index` snapshots, used by [`analyze_casts`]'s
    /// fixpoint loop. The remaining analyzer state is reset to its
    /// [`Self::new`] default. Only these three maps carry across
    /// passes:
    ///
    /// - `caller_arg_types` propagates the caller's tracked register
    ///   state at every BPF_PSEUDO_CALL site so that on a subsequent
    ///   pass the callee's FuncEntry can upgrade its R1..R5 from
    ///   `Unknown` to the typed view the caller provided.
    /// - `arena_stx_findings` propagates the slots that some pass has
    ///   observed receiving an `ArenaU64FromAlloc` STX. The
    ///   alias-tracking arm in [`Self::handle_ldx`] checks this map at
    ///   every typed-Pointer LDX of a u64 field; entries are needed
    ///   when the consumer LDX site appears at a lower PC than the
    ///   bridge / STX-flow producer that originally tagged the slot,
    ///   so the linear walk cannot observe the producer's evidence
    ///   in time. Entries are inserted from real allocator-return
    ///   evidence (BPF_PSEUDO_CALL with [`SubprogReturn`] seed,
    ///   [`Self::handle_kfunc_call`] arena-allocator allowlist hit,
    ///   or [`Self::bridge_map_value_spill`] reading an
    ///   `ArenaU64FromAlloc` stack slot) so carrying them across
    ///   passes is safe — a slot only ends up in this map after the
    ///   analyzer saw direct evidence the slot held an arena VA.
    ///   The map only grows monotonically across passes (insertion
    ///   sites never erase a `Pending` entry), so it converges
    ///   bounded by the number of distinct `(parent_struct_id, field_offset)`
    ///   slots in the program BTF.
    /// - `arena_alloc_size_index` propagates the captured `sizeof`
    ///   argument per arena STX slot. Carries alongside
    ///   `arena_stx_findings` so the alias-tracking LDX arm can
    ///   inherit a previously captured size when the LDXed value
    ///   propagates further. The index keys are a subset of the
    ///   findings keys; on a fresh insert the value is the size
    ///   captured at the producing STX, on subsequent disagreeing
    ///   STXs the index entry collapses to `None` (cleanup of
    ///   ambiguous evidence). Carrying the index across passes is
    ///   safe for the same monotonic-evidence reasons that apply to
    ///   `arena_stx_findings`.
    fn with_carryover(
        btf: &'a Btf,
        caller_arg_types: CallerArgTypes,
        arena_stx_findings: ArenaStxFindings,
        arena_alloc_size_index: ArenaAllocSizeIndex,
    ) -> Self {
        // Seed `max_seen_type_id` from the largest parent struct id
        // present in `arena_stx_findings`. Without this seed, the
        // candidate-search slack in finalize() would be computed
        // against a value smaller than the parent ids the carry-over
        // already references, so [`build_layout_index`] could miss
        // candidate structs whose ids fall above the un-seeded slack
        // window.
        let max_seen_type_id = arena_stx_findings
            .keys()
            .map(|(parent, _)| *parent)
            .max()
            .unwrap_or(0);
        Self {
            btf,
            regs: [RegState::Unknown; 11],
            patterns: BTreeMap::new(),
            kptr_findings: BTreeMap::new(),
            stack_slots: BTreeMap::new(),
            arena_confirmed: BTreeSet::new(),
            arena_stx_findings,
            arena_alloc_size_index,
            max_seen_type_id,
            alloc_seeds_applied: 0,
            caller_arg_types,
            bridge_slot_origins: HashMap::new(),
            func_has_alloc: false,
            branch_source_regs: HashMap::new(),
        }
    }

    /// Extract the post-walk `caller_arg_types`,
    /// `arena_stx_findings`, and `arena_alloc_size_index` maps for
    /// the fixpoint loop. Consumes the analyzer because the maps are
    /// moved out rather than cloned; the caller in [`analyze_casts`]
    /// either detects convergence (and rebuilds a fresh analyzer to
    /// call finalize on) or feeds the extracted maps into a successor
    /// pass via [`Self::with_carryover`].
    fn into_carryover(self) -> (CallerArgTypes, ArenaStxFindings, ArenaAllocSizeIndex) {
        (
            self.caller_arg_types,
            self.arena_stx_findings,
            self.arena_alloc_size_index,
        )
    }

    fn seed(&mut self, initial_regs: &[InitialReg]) {
        for seed in initial_regs {
            // r10 is the BPF frame pointer; never a typed pointer to
            // a kernel struct. Reject so the analysis never derives
            // a cast from it. R0..R9 are the typed-value registers
            // per linux uapi `bpf.h` (BPF_REG_0..BPF_REG_9, with
            // BPF_REG_10 as the FP and `MAX_BPF_REG = 11` as the
            // register-file bound). The `>= BPF_REG_R10` gate
            // rejects both r10 itself and any out-of-range index
            // 11..=15 a malformed seed could carry.
            if (seed.reg as usize) >= BPF_REG_R10 {
                continue;
            }
            // Resolve through Ptr/Typedef/etc. to a Struct. If no
            // struct is reachable, drop silently — false negatives
            // here are acceptable.
            let Some(sid) = super::bpf_map::resolve_to_struct_id(self.btf, seed.struct_type_id)
            else {
                continue;
            };
            self.regs[seed.reg as usize] = RegState::Pointer {
                struct_type_id: sid,
            };
            self.note_type_id(sid);
        }
    }

    /// Reseed R1..R5 from a FuncProto's parameter list. Called at
    /// every PC matching a [`FuncEntry::insn_offset`]. Parameters
    /// past R5 are skipped silently — the BPF ABI passes only the
    /// first five in registers and anything beyond is on the stack
    /// (which the analyzer treats as Unknown unless an explicit
    /// spill writes a typed value).
    ///
    /// `func_proto_id` may resolve to either `Type::FuncProto`
    /// directly or to `Type::Func` (in which case it is peeled one
    /// level to its FuncProto). Anything else disables seeding for
    /// this entry.
    ///
    /// All registers and stack slots are cleared. R1..R5 are then
    /// re-seeded from the FuncProto parameters: a parameter that
    /// peels to `Ptr -> Struct/Union` becomes `Pointer{struct_id}`,
    /// everything else (scalar, void, function pointer) leaves the
    /// register `Unknown` — exactly what the BPF ABI provides at a
    /// function entry. The forward walk concatenates subprogram
    /// instruction streams; a function entry reached by fall-through
    /// from a prior EXIT would inherit stale R6..R9 state from an
    /// unrelated function if those registers were preserved (the
    /// BPF ABI's callee-saved property only holds across a real
    /// CALL, not across a textual concatenation), so the unconditional
    /// clear is the correct safe direction.
    fn seed_from_func_proto(&mut self, func_proto_id: u32) {
        // Pre-clear ALL registers and the stack unconditionally. The
        // linear forward walk concatenates subprogram instruction
        // streams; a function entry reached by fall-through from a
        // prior EXIT would inherit stale R6..R9 state from an
        // unrelated function. BPF ABI says R6..R9 are callee-saved
        // (inherited from the CALLER), but in the linear walk there
        // is no real caller — the prior function is unrelated.
        // Preserving R6..R9 would let stale typed pointers leak
        // across function boundaries, risking false positives. The
        // cost: legitimate call-inherited R6..R9 typing is lost
        // (false negative), which is the safe direction. Even if
        // proto resolution fails below, stale typed-pointer state
        // from a prior function must not survive past this entry.
        // Preserve R0 across FuncEntry when it holds a fresh
        // allocator-return seed. In BPF's concatenated text layout,
        // a call at the end of subprogram A is immediately followed
        // by subprogram B's FuncEntry at PC+1. The SubprogReturn
        // seed sets R0 = ArenaU64FromAlloc{alloc_size} at the call
        // PC; without preservation, the FuncEntry reset at PC+1
        // destroys it before any subsequent instruction can
        // propagate the alloc_size to an STX finding.
        //
        // The match gates on `alloc_size: Some(_)` — an
        // `ArenaU64FromAlloc { alloc_size: None }` seed is NOT
        // preserved. `None` comes from kfunc allocators whose
        // per-slot headers carry the payload BTF id, which the
        // renderer's [`crate::monitor::btf_render::MemReader::resolve_arena_type`]
        // bridge resolves at chase time. Losing the tag across a
        // FuncEntry merely falls back to the existing bridge path
        // (no information lost). Preserving `Some(_)` is the only
        // shape that benefits — the `scx_static_alloc_internal` bump
        // allocator emits no per-slot header, so the captured
        // `alloc_size` is the only payload-type evidence available.
        let saved_r0 = match self.regs[0] {
            r0 @ RegState::ArenaU64FromAlloc {
                alloc_size: Some(_),
                ..
            } => Some(r0),
            _ => None,
        };
        self.regs = [RegState::Unknown; 11];
        if let Some(r0) = saved_r0 {
            self.regs[0] = r0;
        }
        self.stack_slots.clear();
        self.bridge_slot_origins.clear();
        self.func_has_alloc = false;
        let proto = match self.btf.resolve_type_by_id(func_proto_id) {
            Ok(Type::FuncProto(fp)) => fp,
            Ok(Type::Func(f)) => match f.get_type_id() {
                Ok(pid) => match self.btf.resolve_type_by_id(pid) {
                    Ok(Type::FuncProto(fp)) => fp,
                    _ => return,
                },
                Err(_) => return,
            },
            _ => return,
        };
        // Cap at R5 — BPF ABI passes args 1..=5 in registers. R0 is
        // the return slot, R6..R9 are callee-saved, and R10 is the
        // read-only frame pointer; the kernel verifier rejects
        // programs that try to pass more than five register-args,
        // so anything beyond that index is dead BTF.
        for (i, param) in proto.parameters.iter().enumerate().take(5) {
            // Variadic sentinel (name_off=0, type=0) terminates the
            // parameter list — `break` rather than `continue` because
            // any subsequent parameter slot is unreachable in the
            // BPF calling convention (the variadic marker is the
            // proto's logical end). A `continue` here would let a
            // parameter following the variadic sentinel reseed a
            // later register, which contradicts the proto's intent.
            if param.is_variadic() {
                break;
            }
            let Ok(tid) = param.get_type_id() else {
                continue;
            };
            // Only `Ptr -> ... -> Struct/Union` parameters become
            // typed registers; scalars and function pointers are
            // not pointer-like for our purposes.
            // `bpf_map::resolve_to_struct_id` walks Ptr / Const /
            // Typedef / TypeTag / DeclTag / Volatile / Restrict —
            // exactly what kernel param decls use.
            if let Some(sid) = super::bpf_map::resolve_to_struct_id(self.btf, tid) {
                let reg_idx = i + 1; // param 0 -> R1, …, param 4 -> R5.
                self.regs[reg_idx] = RegState::Pointer {
                    struct_type_id: sid,
                };
                self.note_type_id(sid);
            }
        }
    }

    fn run(
        &mut self,
        insns: &[BpfInsn],
        jump_targets: &BTreeSet<usize>,
        func_entries: &[FuncEntry],
        datasec_pointers: &[DatasecPointer],
        subprog_returns: &[SubprogReturn],
    ) {
        // BPF_LD_IMM64 is a two-insn pseudo-instruction. The decoder
        // skips its second slot via this flag.
        let mut skip_next = false;

        // Pre-build (insn_offset -> proto_id list) so per-PC lookup
        // is O(1). Multiple FuncEntry records for the same PC are
        // preserved in input order — `seed_from_func_proto` is
        // idempotent enough that running them in sequence has the
        // same effect as a single seed (the last one's parameters
        // overwrite the earlier ones, matching BPF ABI's
        // "last declaration wins" semantics for duplicate
        // FuncProtos).
        let mut entries_by_pc: std::collections::HashMap<usize, Vec<u32>> =
            std::collections::HashMap::with_capacity(func_entries.len());
        for fe in func_entries {
            entries_by_pc
                .entry(fe.insn_offset)
                .or_default()
                .push(fe.func_proto_id);
        }

        // Pre-build (insn_offset -> (datasec_type_id, base_offset)) so
        // the `BPF_LD_IMM64` arm can apply the annotation in O(1).
        // Duplicates at the same PC keep the last entry's payload —
        // mirrors `entries_by_pc`'s "last write wins" semantics. A
        // genuine collision (two distinct datasecs claiming the same
        // PC) cannot happen with valid input: each LD_IMM64 has at
        // most one relocation, and one relocation resolves to one
        // section symbol. A duplicate annotation entry produced by a
        // caller bug is fail-soft: the analyzer types the register
        // from the last entry and proceeds.
        let mut datasec_by_pc: std::collections::HashMap<usize, (u32, u32)> =
            std::collections::HashMap::with_capacity(datasec_pointers.len());
        for dp in datasec_pointers {
            datasec_by_pc.insert(dp.insn_offset, (dp.datasec_type_id, dp.base_offset));
        }

        // Pre-build the subprog-return seed map so the `BPF_OP_CALL`
        // arm can decide whether to seed R0 to
        // [`RegState::ArenaU64FromAlloc`] in O(1) AND propagate the
        // captured `alloc_size`. Duplicates collapse (calling the
        // same allocator at the same PC twice would be physically
        // impossible — one PC is one instruction). The map's value is
        // the [`SubprogReturn::alloc_size`] payload — `Some(n)` for
        // `scx_static_alloc_internal` calls whose `size` argument the
        // host-side loader recovered, `None` for other allocators or
        // when the lookback failed.
        let mut subprog_returns_by_pc: std::collections::HashMap<usize, Option<u64>> =
            std::collections::HashMap::with_capacity(subprog_returns.len());
        for sr in subprog_returns {
            subprog_returns_by_pc.insert(sr.insn_offset, sr.alloc_size);
        }

        for (pc, insn) in insns.iter().enumerate() {
            // Jump-target reset fires BEFORE skip_next so a JMP
            // that lands mid-LD_IMM64 (malformed but parseable)
            // still clears stale state. Without this ordering,
            // pre-jump register state would survive past the
            // skip into the next valid instruction.
            // Branch-source merge: when a conditional branch saved
            // register state for this target PC, merge with the
            // current state (which reflects the fall-through path).
            // For each register: if the fall-through left it Unknown
            // but the branch source had it typed, restore the typed
            // value. If both are typed but disagree, drop to Unknown.
            if let Some(src_regs) = self.branch_source_regs.get(&pc) {
                for (i, src_reg) in src_regs.iter().enumerate().take(11) {
                    match (self.regs[i], *src_reg) {
                        (a, b) if a == b => {}
                        (RegState::ArenaU64FromAlloc { .. }, _) => {
                            // Fall-through has arena tag — keep it.
                            // ArenaU64FromAlloc is strictly more
                            // informative than any other non-Unknown
                            // state; losing it drops alloc_size
                            // evidence across branch merges.
                        }
                        (_, typed @ RegState::ArenaU64FromAlloc { .. }) => {
                            self.regs[i] = typed;
                        }
                        (_, typed) if !matches!(typed, RegState::Unknown) => {
                            self.regs[i] = typed;
                        }
                        _ => {}
                    }
                }
            }

            if skip_next {
                skip_next = false;
                continue;
            }

            // Function-entry reseeding runs after the jump-target
            // reset (so a func entry that is also a jump target
            // still gets its parameter types restored) and before
            // step() executes the instruction. Multiple matching
            // entries at the same PC are processed in order — the
            // last one wins, matching how the BPF ABI would behave
            // if a duplicate FuncProto were declared.
            if let Some(protos) = entries_by_pc.get(&pc) {
                for proto_id in protos {
                    self.seed_from_func_proto(*proto_id);
                }
                if let Some(caller_args) = self.caller_arg_types.get(&pc) {
                    let args = *caller_args;
                    for (i, &caller_state) in args.iter().enumerate() {
                        let reg_idx = i + 1; // R1..R5
                        if matches!(self.regs[reg_idx], RegState::Unknown) {
                            if let RegState::Pointer { struct_type_id } = caller_state {
                                self.regs[reg_idx] = RegState::Pointer { struct_type_id };
                                self.note_type_id(struct_type_id);
                            } else if let r @ RegState::ArenaU64FromAlloc { .. } = caller_state {
                                self.regs[reg_idx] = r;
                            }
                        }
                    }
                }
            }

            // The `BPF_LD_IMM64` arm consults this entry to type the
            // destination register as `Pointer{datasec_type_id}` plus
            // the per-variable base offset. Read here (not inside
            // `step`) so the lookup table does not have to be threaded
            // through helper layers; the LD arm checks `datasec_hit`
            // and falls through to the default Unknown when None.
            let datasec_hit = datasec_by_pc.get(&pc).copied();

            // Allocator-return seed: the `BPF_OP_CALL` arm consults
            // this lookup and, AFTER the standard R0..=R5 clobber, sets
            // R0 to [`RegState::ArenaU64FromAlloc`] when the PC
            // matches a [`SubprogReturn::insn_offset`]. The subsequent
            // STX of R0 (or any propagated copy) into a typed `u64`
            // field of a `Pointer{P}` parent records `(P, off)` as an
            // Arena cast finding. The optional `alloc_size` rides along
            // for `scx_static_alloc_internal` so the renderer can
            // size-match the payload BTF type at chase time.
            // `Some(None)` means a seed applies but no `alloc_size` was
            // captured (any allocator other than
            // `scx_static_alloc_internal`, or a recovery failure);
            // `Some(Some(n))` means seed AND captured size.
            let alloc_seed = subprog_returns_by_pc.get(&pc).copied();

            if insn.code == (BPF_CLASS_JMP | BPF_OP_CALL) && insn.src_reg() == BPF_PSEUDO_CALL {
                let callee_pc = (pc as i64 + 1 + insn.imm as i64) as usize;
                // Snapshot caller's argument registers for the callee.
                // ArenaU64FromAlloc state (including source slot and
                // alloc_size) is preserved across the call boundary —
                // the callee inherits the full arena context.
                let new_args = [
                    self.regs[1],
                    self.regs[2],
                    self.regs[3],
                    self.regs[4],
                    self.regs[5],
                ];
                self.caller_arg_types
                    .entry(callee_pc)
                    .and_modify(|existing| {
                        for (slot, new) in existing.iter_mut().zip(new_args.iter()) {
                            match (*slot, *new) {
                                (RegState::Unknown, _) => *slot = *new,
                                (_, RegState::Unknown) => {}
                                (a, b) if a == b => {}
                                _ => *slot = RegState::Unknown,
                            }
                        }
                    })
                    .or_insert(new_args);
            }

            self.step(*insn, &mut skip_next, datasec_hit, alloc_seed);

            // Save register state at conditional branch sources so
            // the target can merge with the fall-through state. The
            // branch guarantees the tested register holds its pre-
            // branch value on the taken path; the fall-through may
            // clobber it.
            let class = insn.code & 0x07;
            let op = insn.code & 0xf0;
            if (class == BPF_CLASS_JMP || class == BPF_CLASS_JMP32)
                && op != BPF_OP_CALL
                && op != 0x00 // JA (unconditional)
                && insn.code != 0x06
            // goto32 (unconditional)
            {
                let target = (pc as i64 + 1 + insn.off as i64) as usize;
                self.branch_source_regs
                    .entry(target)
                    .and_modify(|existing| {
                        for (i, new) in existing.iter_mut().zip(self.regs.iter()) {
                            match (*i, *new) {
                                (RegState::Unknown, typed) => *i = typed,
                                (_, RegState::Unknown) => {}
                                (a, b) if a == b => {}
                                _ => *i = RegState::Unknown,
                            }
                        }
                    })
                    .or_insert(self.regs);
            }

            // Dead-code disambiguation barrier: after an EXIT or
            // unconditional JA/gotol, the NEXT linear PC is
            // unreachable along this control-flow path. If the
            // walker continues processing it (because pc+1 is not
            // itself a jump target — e.g. it is part of an
            // unrelated subprogram concatenated after this one),
            // any RegState/stack_slot we leave in place would leak
            // into that unrelated subprogram's analysis and produce
            // false positives. Reset preemptively unless pc+1 is a
            // jump target (in which case the head-of-loop reset at
            // the next iteration already fires).
            let class = insn.code & 0x07;
            let op = insn.code & 0xf0;
            let unconditional_ja =
                (class == BPF_CLASS_JMP || class == BPF_CLASS_JMP32) && op == 0x00;
            let is_exit = class == BPF_CLASS_JMP && op == BPF_OP_EXIT;
            if (is_exit || unconditional_ja) && !jump_targets.contains(&(pc + 1)) {
                self.regs = [RegState::Unknown; 11];
                self.stack_slots.clear();
                self.bridge_slot_origins.clear();
                self.func_has_alloc = false;
            }
        }
    }

    fn step(
        &mut self,
        insn: BpfInsn,
        skip_next: &mut bool,
        datasec_hit: Option<(u32, u32)>,
        alloc_seed: Option<Option<u64>>,
    ) {
        let class = insn.code & 0x07;
        let dst = insn.dst_reg() as usize;
        let src = insn.src_reg() as usize;

        // BPF reg fields are 4-bit (0..=15) per the BpfInsn
        // encoding, but only 0..=10 are valid registers. A
        // malformed instruction stream could carry 11..=15;
        // reject early so subsequent direct array indexing of
        // self.regs[dst] / self.regs[src] cannot panic. Mirrors
        // the bounds gate in `set_reg()`.
        if dst >= self.regs.len() || src >= self.regs.len() {
            return;
        }

        match class {
            BPF_CLASS_LDX => {
                let mode = insn.code & 0xe0;
                let size = insn.code & 0x18;
                // BPF_MEM (0x60) is the plain "load size bytes" mode.
                // BPF_MEMSX is a sign-extended load — does not
                // produce a u64 we care about. BPF_ATOMIC stores
                // through dst, not load. BPF_PROBE_MEM (0x20) and
                // friends are post-verifier markers (see linux
                // include/linux/filter.h) that never appear in
                // pre-verification bytecode the analyzer consumes;
                // treating them as Unknown is the safe direction.
                if mode != BPF_MODE_MEM {
                    self.set_reg(dst, RegState::Unknown);
                    return;
                }
                self.handle_ldx(dst, src, size, insn.off as i32);
            }
            BPF_CLASS_STX => {
                let mode = insn.code & 0xe0;
                let size = insn.code & 0x18;
                // BPF_MEM (0x60): plain store, the spill / kptr
                // path. BPF_ATOMIC (0xc0): read-modify-write — XCHG
                // and CMPXCHG overwrite a register with the OLD
                // memory value, which we model by clobbering the
                // affected register so a stale typed-pointer state
                // does not survive into the post-atomic flow. We do
                // NOT record a kptr finding for atomic ops: their
                // store semantics differ (XCHG returns the prior
                // value into src; CMPXCHG conditionally writes), so
                // attributing a `Pointer{T}` source to the slot is
                // unsafe. BPF_PROBE_* mode bits are post-verifier
                // markers and never appear in pre-verification
                // bytecode (see linux include/linux/filter.h).
                if mode == BPF_MODE_ATOMIC {
                    self.handle_atomic(dst, src, insn.imm, insn.off);
                    return;
                }
                if mode != BPF_MODE_MEM {
                    return;
                }
                self.handle_stx(dst, src, size, insn.off);
            }
            BPF_CLASS_ST => {
                // BPF_ST writes an immediate to memory. The constant
                // is never a typed pointer — but the store may still
                // alias a stack slot we are tracking through STX.
                // Invalidate the slot (write of an immediate
                // overwrites whatever typed value used to live
                // there) so a later LDX r10-relative does not
                // resurrect a stale Pointer state.
                let mode = insn.code & 0xe0;
                if mode == BPF_MODE_MEM && dst == BPF_REG_R10 {
                    self.stack_slots.remove(&insn.off);
                }
            }
            BPF_CLASS_LD => {
                // BPF_LD_IMM64: BPF_LD | BPF_DW | BPF_IMM (0x18).
                // Two-slot instruction: the next BpfInsn carries
                // the upper 32 bits of the 64-bit immediate. The
                // first slot's `imm` is one of: a literal 64-bit
                // constant (src_reg == 0), a map fd
                // (src_reg == BPF_PSEUDO_MAP_FD), or a pointer to
                // a map's value memory at a known offset
                // (src_reg == BPF_PSEUDO_MAP_VALUE) — the case
                // this arm types as `DatasecPointer`. See linux
                // uapi `bpf.h` `BPF_PSEUDO_MAP_*` and `kernel/bpf/
                // syscall.c bpf_check`.
                if insn.code == (BPF_CLASS_LD | BPF_SIZE_DW | BPF_MODE_IMM) {
                    // BPF_PSEUDO_MAP_VALUE branch: the loaded
                    // value is a pointer into a map's value memory
                    // at a known byte offset. The host-side cast
                    // loader passes a per-PC `(datasec_type_id,
                    // base_offset)` annotation when the map is a
                    // global section (`.bss`/`.data`/`.rodata`/
                    // `.data.<name>`); the LD_IMM64's `imm` field
                    // already carries the per-variable byte offset
                    // (the relocation entry against the section
                    // symbol contributes 0 to the addend, so the
                    // raw imm IS the offset). Type the destination
                    // register as `DatasecPointer` so subsequent
                    // STX/LDX through it can resolve to a specific
                    // global variable via `struct_member_at` over
                    // the datasec's `VarSecinfo` entries.
                    //
                    // The `src_reg == BPF_PSEUDO_MAP_VALUE` gate
                    // matches POST-relocation bytecode — what
                    // libbpf produces in-kernel before the
                    // verifier runs. The host-side cast loader
                    // sees PRE-relocation bytecode (the embedded
                    // `.bpf.objs` blob is the raw `.bpf.o`), where
                    // src_reg == 0 even for map_value references.
                    // The loader carries the relocation evidence
                    // via `datasec_hit` instead, so this arm fires
                    // on either a real PSEUDO_MAP_VALUE src_reg
                    // OR a caller-supplied datasec annotation —
                    // whichever path the input provides.
                    if let Some((datasec_type_id, base_offset)) = datasec_hit {
                        self.set_reg(
                            dst,
                            RegState::DatasecPointer {
                                datasec_type_id,
                                base_offset,
                            },
                        );
                        self.note_type_id(datasec_type_id);
                    } else {
                        // Every other LD_IMM64 shape collapses to
                        // Unknown for the destination register:
                        //   - `src_reg == BPF_PSEUDO_MAP_VALUE`
                        //     without a caller annotation: post-
                        //     relocation bytecode whose map_fd
                        //     alone does not identify a datasec
                        //     (the mapping lives in the loader's
                        //     `.maps` parser). Drop dst — false
                        //     negative is the safe direction.
                        //   - plain LD_IMM64 (literal constant,
                        //     map_fd, BTF id, …): the destination
                        //     receives a 64-bit immediate, never
                        //     a typed kernel pointer the renderer
                        //     needs to chase.
                        self.set_reg(dst, RegState::Unknown);
                    }
                    *skip_next = true;
                } else {
                    // Other BPF_LD modes (BPF_ABS, BPF_IND) load
                    // packet data into r0 — not relevant here.
                    self.set_reg(0, RegState::Unknown);
                }
            }
            BPF_CLASS_ALU64 | BPF_CLASS_ALU => {
                let op = insn.code & 0xf0;
                let src_kind = insn.code & 0x08;
                if op == BPF_OP_MOV && src_kind == BPF_SRC_X {
                    // r_dst = r_src — propagate state. Only
                    // ALU64|MOV preserves a 64-bit value verbatim.
                    // 32-bit MOV on a u64 field would truncate the
                    // pointer; treat 32-bit as Unknown to avoid
                    // false positives.
                    if class == BPF_CLASS_ALU64 {
                        // ALU64|MOV|X reuses the `off` field to
                        // encode sign-extending MOV (off in {8, 16,
                        // 32}) and BPF_ADDR_SPACE_CAST (off == 1)
                        // per linux kernel/bpf/verifier.c
                        // check_alu_op. off == 0 is the plain copy.
                        // R10 is the read-only frame pointer —
                        // never a valid MOV/cast destination.
                        // Reject to maintain the invariant that
                        // regs[10] is always Unknown.
                        if dst == BPF_REG_R10 {
                            return;
                        }
                        match insn.off {
                            0 => {
                                if src == BPF_REG_R10 {
                                    self.regs[dst] = RegState::FrameAddr { offset: 0 };
                                } else {
                                    self.regs[dst] = self.regs[src];
                                }
                            }
                            1 => {
                                // BPF_ADDR_SPACE_CAST. The verifier
                                // (kernel/bpf/verifier.c
                                // check_alu_op) accepts only
                                // imm == 1 (cast as(1) → as(0):
                                // arena → kernel) and
                                // imm == 1u<<16 (kernel → arena).
                                // For arena → kernel, propagate the
                                // source's RegState so subsequent
                                // dereferences attribute correctly.
                                if insn.imm == 1 {
                                    if let RegState::LoadedU64Field {
                                        source_struct_id,
                                        field_offset,
                                    } = self.regs[src]
                                    {
                                        self.arena_confirmed
                                            .insert((source_struct_id, field_offset));
                                    }
                                    self.regs[dst] = self.regs[src];
                                } else if insn.imm == (1 << 16) {
                                    if let RegState::LoadedU64Field {
                                        source_struct_id,
                                        field_offset,
                                    } = self.regs[src]
                                    {
                                        self.arena_confirmed
                                            .insert((source_struct_id, field_offset));
                                        self.regs[dst] = self.regs[src];
                                    } else if matches!(
                                        self.regs[src],
                                        RegState::Pointer { .. }
                                            | RegState::ArenaU64FromAlloc { .. }
                                    ) {
                                        self.regs[dst] = self.regs[src];
                                    } else {
                                        self.set_reg(dst, RegState::Unknown);
                                    }
                                } else {
                                    self.set_reg(dst, RegState::Unknown);
                                }
                            }
                            8 | 16 | 32 => {
                                // Sign-extending MOV (s8/s16/s32 →
                                // s64). A typed pointer cannot
                                // survive sign extension; drop dst.
                                self.set_reg(dst, RegState::Unknown);
                            }
                            _ => {
                                // Unknown/reserved off encoding —
                                // be conservative.
                                self.set_reg(dst, RegState::Unknown);
                            }
                        }
                    } else {
                        self.set_reg(dst, RegState::Unknown);
                    }
                } else if class == BPF_CLASS_ALU64
                    && op == BPF_OP_ADD
                    && (insn.code & 0x08) == 0
                    && let RegState::FrameAddr { offset } = self.regs[dst]
                {
                    let new_off = (offset as i32).saturating_add(insn.imm);
                    if let Ok(narrowed) = i16::try_from(new_off) {
                        self.regs[dst] = RegState::FrameAddr { offset: narrowed };
                    } else {
                        self.set_reg(dst, RegState::Unknown);
                    }
                } else {
                    self.set_reg(dst, RegState::Unknown);
                }
            }
            BPF_CLASS_JMP | BPF_CLASS_JMP32 => {
                let op = insn.code & 0xf0;
                if op == BPF_OP_CALL {
                    // BPF_CALL clobbers r0..=r5 per the BPF ABI:
                    // r1..r5 are call args (consumed), r0 carries
                    // the return value. Save R1 BEFORE the clobber
                    // so the helper-return arm below can resolve
                    // the map descriptor argument for
                    // `bpf_map_lookup_elem`. Once R0..R5 are
                    // cleared, R1's pre-call state is gone — only
                    // the saved snapshot survives across the
                    // clobber boundary.
                    let pre_call_r1 = self.regs[1];
                    let pre_call_r3 = self.regs[3];
                    for r in 0..=5 {
                        self.set_reg(r, RegState::Unknown);
                    }
                    let pseudo = insn.src_reg();
                    if pseudo == BPF_PSEUDO_KFUNC_CALL {
                        // kfunc calls (src_reg == BPF_PSEUDO_KFUNC_CALL):
                        // the imm field carries the kernel BTF id of
                        // the kfunc — if its FuncProto return type
                        // peels through Ptr to a Struct/Union, set r0
                        // to a typed pointer. Mutually exclusive with
                        // the plain-helper arm below: kfuncs use a
                        // distinct pseudo selector (see linux uapi
                        // `bpf.h`: `BPF_PSEUDO_KFUNC_CALL = 2`).
                        self.handle_kfunc_call(insn.imm);
                    } else if pseudo == 0
                        && (insn.imm == BPF_FUNC_MAP_LOOKUP_ELEM
                            || insn.imm == BPF_FUNC_MAP_LOOKUP_PERCPU_ELEM)
                    {
                        // Plain-helper arm. `pseudo == 0` is the
                        // helper-call form (linux uapi `bpf.h`:
                        // `BPF_PSEUDO_CALL = 1` is BPF-to-BPF;
                        // `BPF_PSEUDO_KFUNC_CALL = 2` is kfunc; the
                        // verifier treats `src_reg == 0` as a kernel
                        // helper-id call). `imm` is the helper id
                        // (`BPF_FUNC_*`); the analyzer types R0 only
                        // for `bpf_map_lookup_elem` (helper id 1) —
                        // no other helper has a pointer-to-struct
                        // return shape we can resolve from the BPF
                        // program BTF alone. The map descriptor lives
                        // in R1 at the call site (per
                        // `bpf_map_lookup_elem_proto::arg1_type =
                        // ARG_CONST_MAP_PTR` in linux
                        // `kernel/bpf/helpers.c`); the saved
                        // pre-clobber state above carries the
                        // analyzer's pre-call view.
                        //
                        // Only fires when the saved R1 is a
                        // [`RegState::DatasecPointer`] into a
                        // `BTF_KIND_DATASEC` named `.maps` (the libbpf
                        // user-space BTF map declaration section),
                        // and the map's BTF def carries a `value`
                        // member whose type peels to `Ptr -> Struct/
                        // Union`. Stat-counter maps (`__type(value,
                        // u64)`) drop here — their value type is not
                        // a struct so [`map_value_struct_id`]
                        // returns None. False-negative is the safe
                        // direction.
                        if let RegState::DatasecPointer {
                            datasec_type_id,
                            base_offset,
                        } = pre_call_r1
                            && let Some(sid) =
                                map_value_struct_id(self.btf, datasec_type_id, base_offset)
                        {
                            self.regs[0] = RegState::Pointer {
                                struct_type_id: sid,
                            };
                            self.note_type_id(sid);
                        }
                    } else if pseudo == 0
                        && insn.imm == BPF_FUNC_MAP_UPDATE_ELEM
                        && let RegState::DatasecPointer {
                            datasec_type_id,
                            base_offset,
                        } = pre_call_r1
                        && let Some(value_sid) =
                            map_value_struct_id(self.btf, datasec_type_id, base_offset)
                        && let RegState::FrameAddr { offset: r3_base } = pre_call_r3
                    {
                        self.bridge_map_value_spill(value_sid, r3_base);
                    }
                    // Allocator-return seed: caller-supplied annotation
                    // identified this `BPF_PSEUDO_CALL` PC as a call to
                    // an arena-allocator subprog (see [`SubprogReturn`]).
                    // After the standard R0..=R5 clobber, type R0 as
                    // [`RegState::ArenaU64FromAlloc`] so the next
                    // STX of R0 (or its propagation through MOV /
                    // stack spill / LDX of an already-arena-tagged
                    // slot) records `(parent, off)` as an Arena cast
                    // finding via [`Self::handle_stx`]. The seed is
                    // applied AFTER the clobber so a same-PC kfunc-
                    // call seed (which sets R0 to a typed `Pointer{T}`)
                    // wins on the rare programs where both annotations
                    // resolve to the same call site — kfunc returns
                    // are stronger evidence than the allocator
                    // allowlist.
                    if let Some(captured_alloc_size) = alloc_seed
                        && matches!(self.regs[0], RegState::Unknown)
                    {
                        // Allocator-return seed has no source slot:
                        // the value was synthesized by the allocator,
                        // not loaded from a slot. The downstream STX
                        // of R0 records `(parent, off)` against the
                        // STORE site's slot via `handle_stx`, not
                        // through this register's source field.
                        // `captured_alloc_size` rides on the register
                        // state: `Some(n)` for `scx_static_alloc_internal`
                        // (no per-slot header → renderer needs the size
                        // for BTF matching), `None` for allocators with
                        // a per-slot header that the bridge resolves.
                        self.regs[0] = RegState::ArenaU64FromAlloc {
                            source: None,
                            alloc_size: captured_alloc_size,
                        };
                        // F4 telemetry: bump the seed-applied
                        // counter so [`Self::finalize`] can
                        // distinguish "we saw allocator call
                        // sites but no slot got tagged" (the
                        // non-inlined-helper signature) from
                        // "no allocator was ever called". A
                        // saturating add keeps the count bounded
                        // for pathological inputs that loop a
                        // call site (the verifier rejects such
                        // programs but the analyzer must not
                        // panic on them).
                        self.alloc_seeds_applied = self.alloc_seeds_applied.saturating_add(1);
                        self.func_has_alloc = true;
                    }
                }
                // EXIT, JA, conditional jumps: no state change at
                // the current PC. Branch / fall-through joins are
                // handled by the jump-target reset at the head of
                // each PC the pre-pass flagged.
            }
            _ => {
                // Unknown class — drop dst conservatively.
                self.set_reg(dst, RegState::Unknown);
            }
        }
    }

    fn handle_ldx(&mut self, dst: usize, src: usize, size: u8, off: i32) {
        // Bounds: BPF reg fields are 4-bit (0..=15) but only 0..=10
        // are real registers. A malformed instruction stream could
        // carry 11..=15 here; reject before the direct
        // self.regs[src] read on the typed-base path. Mirrors the
        // gate in `step()` but defends `handle_ldx` independently
        // in case it is called from a future caller.
        if dst >= self.regs.len() || src >= self.regs.len() {
            return;
        }
        // R10 is the read-only frame pointer — loading INTO r10
        // violates the BPF ABI (verifier rejects). Guard to
        // maintain the invariant that regs[10] is always Unknown,
        // matching the MOV guard in step().
        if dst == BPF_REG_R10 {
            return;
        }
        let size_bytes = ldx_size_bytes(size);

        // Stack reload: r_dst = *(size *)(r10 + off). Restore the
        // slot's saved RegState. Only BPF_DW reloads carry pointer
        // values intact; sub-word reloads truncate so we mark
        // Unknown. The frame-pointer base is r10; src register here
        // is the address-base, dst is the value receiver. Negative
        // off identifies a spill slot (stack grows down); a
        // non-negative off through r10 is undefined behavior in
        // BPF — drop conservatively rather than guess.
        if src == BPF_REG_R10 {
            if size != BPF_SIZE_DW || off >= 0 {
                self.set_reg(dst, RegState::Unknown);
                return;
            }
            // i32 -> i16: BpfInsn::off is i16 to begin with, the
            // caller widens it to i32. Round-trip to the slot key
            // type. Out-of-range is impossible because the source
            // is i16, but guard anyway.
            let Ok(slot_off) = i16::try_from(off) else {
                self.set_reg(dst, RegState::Unknown);
                return;
            };
            let restored = self.stack_slots.get(&slot_off).copied();
            self.set_reg(dst, restored.unwrap_or(RegState::Unknown));
            return;
        }

        // Compute (parent_btf_id, base_offset) for the load
        // target. Two RegState variants reach the typed-LDX path:
        // `Pointer{struct}` (the kernel-driver case) and
        // `DatasecPointer{datasec, base}` (the BSS / data global
        // case where the LD_IMM64's baked-in offset contributes
        // to the effective field offset). Both share the same
        // member-resolution + u64/Ptr-detection shape; only the
        // parent BTF id selects which layout `struct_member_at`
        // walks.
        let typed_base: Option<(u32, u32)> = match self.regs[src] {
            RegState::Pointer { struct_type_id } => Some((struct_type_id, 0)),
            RegState::DatasecPointer {
                datasec_type_id,
                base_offset,
            } => Some((datasec_type_id, base_offset)),
            _ => None,
        };
        if let Some((parent_btf_id, base_offset)) = typed_base {
            let insn_off = match field_byte_offset(off) {
                Some(o) => o,
                None => {
                    self.set_reg(dst, RegState::Unknown);
                    return;
                }
            };
            let Some(field_off) = base_offset.checked_add(insn_off) else {
                // Overflow on pathological large base + insn off:
                // drop conservatively. False negative is safe.
                self.set_reg(dst, RegState::Unknown);
                return;
            };
            if let Some(member) = struct_member_at(self.btf, parent_btf_id, field_off) {
                let member_type_id = member.member_type_id();
                let resolved = super::btf_render::peel_modifiers(self.btf, member_type_id);
                // Canonical key for the pattern map: for Datasec
                // members, key on the variable's start offset so
                // the renderer's `(parent, member_offset)` lookup
                // matches `VarSecinfo` boundaries. For struct
                // members, the queried offset IS the member start.
                let (canonical_parent, canonical_field_off) = match &member {
                    MemberAt::Struct {
                        resolved_parent_type_id,
                        resolved_member_offset,
                        ..
                    } => (*resolved_parent_type_id, *resolved_member_offset),
                    MemberAt::Datasec {
                        var_byte_offset, ..
                    } => (parent_btf_id, *var_byte_offset),
                };
                match (size_bytes, resolved) {
                    // Ptr field directly -- BTF already typed.
                    (Some(8), Some(Type::Ptr(p))) => {
                        if let Ok(pointee) = p.get_type_id()
                            && let Some(sid) =
                                super::bpf_map::resolve_to_struct_id(self.btf, pointee)
                        {
                            self.set_reg(
                                dst,
                                RegState::Pointer {
                                    struct_type_id: sid,
                                },
                            );
                            self.note_type_id(sid);
                            return;
                        }
                        self.set_reg(dst, RegState::Unknown);
                    }
                    // Plain u64 field -- THIS is the cast target.
                    (Some(8), Some(Type::Int(int))) => {
                        if int.size() == 8 && !int.is_signed() && !int.is_bool() && !int.is_char() {
                            // Alias-set tracking: when LDX reads from
                            // a `(parent, off)` slot the analyzer
                            // previously tagged via the STX-flow
                            // arena path (see
                            // [`Self::arena_stx_findings`]), the
                            // loaded value is itself an arena VA. Set
                            // the destination state to
                            // [`RegState::ArenaU64FromAlloc`] carrying
                            // the slot identity so the tag propagates
                            // through subsequent moves / spills /
                            // stores AND a downstream LDX through the
                            // tagged register can record its access
                            // pattern in [`Self::patterns`] against
                            // the source slot. Without the
                            // re-typing, a re-stored value that is
                            // an arena VA from a previously-tagged
                            // slot would lose the
                            // [`StxValueKind::Arena`] dispatch in
                            // [`Self::handle_stx`], and the slot
                            // would never be added to
                            // [`Self::arena_stx_findings`] on a
                            // re-store path. Falls through to the
                            // generic [`RegState::LoadedU64Field`]
                            // shape when the slot has not been
                            // arena-tagged yet — the first STX that
                            // tags the slot populates the index,
                            // after which later LDXs through the
                            // same slot inherit.
                            //
                            // Using [`BTreeMap::contains_key`]
                            // without inspecting the
                            // [`ArenaStxEntry`] variant is
                            // intentional: any entry — `Pending`
                            // or (today unreachable) `Conflicting`
                            // — proves the slot saw an arena STX
                            // somewhere in the program, which is
                            // the only signal alias-tracking
                            // needs. A future `Conflicting` would
                            // still be arena-shaped (the conflict
                            // would be across paths that all wrote
                            // an arena pointer); finalize would
                            // drop the slot from the cast map but
                            // the LDX value loaded out of it is
                            // still an arena VA.
                            //
                            // The source slot recorded in the
                            // [`RegState::ArenaU64FromAlloc`]
                            // variant lets the
                            // [`RegState::ArenaU64FromAlloc`] arm
                            // in this function record the
                            // downstream LDX's `(target_offset,
                            // target_size)` against the source
                            // slot's [`Self::patterns`] entry.
                            // Shape inference at finalize then
                            // overwrites the STX-flow's
                            // deferred-resolve sentinel
                            // (`target_type_id == 0`) with a
                            // concrete BTF id when the access
                            // pattern resolves to a unique struct
                            // (cgx_raw / llcx_raw in
                            // `lib/cgroup_bw.bpf.c`'s
                            // `scx_static_alloc()`-backed
                            // pointers).
                            let dst_state = if self
                                .arena_stx_findings
                                .contains_key(&(canonical_parent, canonical_field_off))
                            {
                                // Inherit the slot's previously
                                // captured `alloc_size` (when a STX
                                // through the same key recorded one)
                                // so a later STX of the LDXed value
                                // into another slot can still
                                // surface the size. Absent index
                                // entry → `None`.
                                let inherited_size = self
                                    .arena_alloc_size_index
                                    .get(&(canonical_parent, canonical_field_off))
                                    .copied()
                                    .flatten();
                                RegState::ArenaU64FromAlloc {
                                    source: Some((canonical_parent, canonical_field_off)),
                                    alloc_size: inherited_size,
                                }
                            } else {
                                RegState::LoadedU64Field {
                                    source_struct_id: canonical_parent,
                                    field_offset: canonical_field_off,
                                }
                            };
                            self.set_reg(dst, dst_state);
                            self.note_type_id(canonical_parent);
                            self.patterns
                                .entry((canonical_parent, canonical_field_off))
                                .or_default();
                        } else {
                            self.set_reg(dst, RegState::Unknown);
                        }
                    }
                    _ => {
                        // Other field shapes (sub-u64 ints,
                        // structs, unions, enums, arrays, floats,
                        // FuncProto) cannot become a pointer by
                        // load alone. Drop dst.
                        self.set_reg(dst, RegState::Unknown);
                    }
                }
            } else {
                self.set_reg(dst, RegState::Unknown);
            }
            return;
        }
        match self.regs[src] {
            RegState::LoadedU64Field {
                source_struct_id,
                field_offset,
            } => {
                // The interesting case: the loaded u64 is being used
                // as a pointer base. Record the access and mark dst
                // Unknown -- we don't know the resolved target yet
                // (that's the matching phase's job).
                let target_off = match field_byte_offset(off) {
                    Some(o) => o,
                    None => {
                        self.set_reg(dst, RegState::Unknown);
                        return;
                    }
                };
                if let Some(sz) = size_bytes {
                    self.patterns
                        .entry((source_struct_id, field_offset))
                        .or_default()
                        .insert(Access {
                            offset: target_off,
                            size: sz,
                        });
                }
                self.set_reg(dst, RegState::Unknown);
            }
            RegState::ArenaU64FromAlloc { source, .. } => {
                // LDX through an arena pointer reads payload bytes
                // out of an allocator slot. When the value register
                // carries the source slot identity (set at the
                // alias-tracking LDX site for slots already in
                // [`Self::arena_stx_findings`]), record the
                // `(target_offset, target_size)` access against the
                // source slot's [`Self::patterns`] entry so shape
                // inference at finalize can intersect access patterns
                // across multiple dereferences and overwrite the
                // STX-flow's deferred-resolve sentinel
                // (`target_type_id == 0`) with a concrete BTF id when
                // unique-shape resolution succeeds.
                //
                // No source means the value came from an
                // allocator-return seed (BPF_PSEUDO_CALL with
                // SubprogReturn, kfunc allowlist hit) or
                // bridge/caller-arg propagation — the slot identity
                // is already recorded directly in
                // [`Self::arena_stx_findings`] by the producing site,
                // and there is no in-frame source slot to attribute
                // this dereference to. Drop the access; the renderer's
                // [`super::btf_render::MemReader::resolve_arena_type`]
                // bridge resolves the payload type at chase time for
                // those slots.
                if let Some((source_struct_id, field_offset)) = source {
                    let target_off = match field_byte_offset(off) {
                        Some(o) => o,
                        None => {
                            self.set_reg(dst, RegState::Unknown);
                            return;
                        }
                    };
                    if let Some(sz) = size_bytes {
                        self.patterns
                            .entry((source_struct_id, field_offset))
                            .or_default()
                            .insert(Access {
                                offset: target_off,
                                size: sz,
                            });
                    }
                }
                self.set_reg(dst, RegState::Unknown);
            }
            RegState::Unknown | RegState::FrameAddr { .. } => {
                self.set_reg(dst, RegState::Unknown);
            }
            RegState::Pointer { .. } | RegState::DatasecPointer { .. } => unreachable!(),
        }
    }

    /// `STX [r_dst_base + off] = r_src_value`.
    ///
    /// Three roles:
    /// 1. Stack spill — `dst == r10`: save src's RegState in
    ///    `stack_slots[off]`. Sub-DW or non-negative-off stores
    ///    invalidate the slot.
    /// 2. Kptr finding — when both base and value registers are
    ///    typed (Pointer{P} for the base, Pointer{T} for the
    ///    value), and the BTF declares the field of P at the store
    ///    offset as a plain `u64` of width 8, record (P, off) -> T
    ///    in the kptr map. The BTF gate prevents writing to a
    ///    pre-typed Ptr field (the kernel-driver case where BTF
    ///    already knows the target).
    /// 3. Arena STX finding — when the base is typed
    ///    `Pointer{P}` / `DatasecPointer` and the value register
    ///    is [`RegState::ArenaU64FromAlloc`] (allocator-return
    ///    seed or alias-tracked from a previously-arena-tagged
    ///    slot), and the BTF declares the field at the store
    ///    offset as a plain `u64`, record `(P, off)` in
    ///    [`Self::arena_stx_findings`]. The slot now holds an
    ///    arena pointer, even though BTF declared it `u64` — the
    ///    renderer's [`MemReader::resolve_arena_type`] bridge
    ///    resolves the payload type at chase time.
    fn handle_stx(&mut self, dst: usize, src: usize, size: u8, off: i16) {
        if dst >= self.regs.len() || src >= self.regs.len() {
            return;
        }
        // Spill path runs first — even for Unknown source values,
        // a store through r10 invalidates the slot (the slot now
        // holds an Unknown value rather than its prior typed
        // content). Without invalidation a subsequent reload could
        // resurrect a stale Pointer state and produce a false
        // positive.
        if dst == BPF_REG_R10 {
            if size != BPF_SIZE_DW || off >= 0 {
                // Sub-DW or out-of-spec store: invalidate any
                // existing slot so a later reload can't pick up
                // stale state. The truncating write to a slot that
                // formerly held a typed pointer cannot preserve
                // the pointer.
                self.stack_slots.remove(&off);
                return;
            }
            // Save the source register's state verbatim. Unknown
            // source means an Unknown saved state, which on reload
            // gives an Unknown register. Pointer / LoadedU64Field
            // round-trip preserved.
            self.stack_slots.insert(off, self.regs[src]);
            return;
        }

        // Kptr path: only DW (8-byte) stores can persist a 64-bit
        // pointer. Sub-DW stores are not pointer-valued.
        if size != BPF_SIZE_DW {
            return;
        }
        // Compute the (parent_btf_id, field_byte_offset) for the
        // store target. Two RegState variants reach the kptr path:
        // `Pointer{struct}` (the kernel-driver case where the
        // parent is a struct or union) and
        // `DatasecPointer{datasec, base}` (the BSS / data global
        // case where the parent is a `BTF_KIND_DATASEC` and the
        // base offset baked into the LD_IMM64 contributes to the
        // effective field offset). In both cases the field offset
        // is `(base) + insn.off`; the parent BTF id selects which
        // layout to consult via `struct_member_at`.
        let (parent_btf_id, base_offset) = match self.regs[dst] {
            RegState::Pointer {
                struct_type_id: pid,
            } => (pid, 0u32),
            RegState::DatasecPointer {
                datasec_type_id,
                base_offset,
            } => (datasec_type_id, base_offset),
            _ => return,
        };
        // Two value-side variants reach the cast-finding paths:
        // `Pointer{T}` (kernel kptr STX into a u64 field) and
        // `ArenaU64FromAlloc` (arena pointer from allocator return,
        // or alias-tracked from a previously-arena-tagged slot).
        // Anything else carries no signal.
        let value_state = match self.regs[src] {
            RegState::Pointer {
                struct_type_id: tid,
            } => StxValueKind::Kptr { target: tid },
            RegState::ArenaU64FromAlloc { alloc_size, .. } => StxValueKind::Arena { alloc_size },
            RegState::Unknown => StxValueKind::Unknown,
            _ => return,
        };
        let Some(insn_off) = field_byte_offset(off as i32) else {
            return;
        };
        let Some(field_off) = base_offset.checked_add(insn_off) else {
            // Pathological large base_offset + insn_off overflow:
            // drop conservatively. False negative is the safe
            // direction; a real BPF program never legitimately
            // produces an offset past `u32::MAX`.
            return;
        };
        // BTF gate: the destination field at this offset must be a
        // plain `u64`. A typed Ptr field is the BTF-already-typed
        // case the renderer handles natively; recording a cast
        // there would duplicate work. A non-u64 field (sub-u64
        // int, struct, array) is not a pointer slot at all — the
        // store is undefined behavior we drop conservatively.
        let Some(member) = struct_member_at(self.btf, parent_btf_id, field_off) else {
            return;
        };
        let member_type_id = member.member_type_id();
        let Some(terminal) = super::btf_render::peel_modifiers(self.btf, member_type_id) else {
            return;
        };
        let Type::Int(int) = terminal else { return };
        if int.size() != 8 || int.is_signed() || int.is_bool() || int.is_char() {
            return;
        }
        // The Datasec path stores the variable's start offset
        // (matching `MemberAt::Datasec::var_byte_offset`) as the
        // canonical key, NOT the queried offset. For a plain u64
        // global the two are equal; for a struct global the
        // queried offset can land mid-struct but the cast finding
        // is keyed on the variable's start so the renderer's
        // `(parent, member_offset)` lookup matches the variable
        // boundary. Lookups through the BSS-DATASEC parent then
        // surface the per-variable kptr / arena finding just like
        // a struct member would.
        let (canonical_parent, canonical_field_off) = match &member {
            MemberAt::Struct {
                resolved_parent_type_id,
                resolved_member_offset,
                ..
            } => (*resolved_parent_type_id, *resolved_member_offset),
            MemberAt::Datasec {
                var_byte_offset, ..
            } => (parent_btf_id, *var_byte_offset),
        };
        self.note_type_id(canonical_parent);
        let key = (canonical_parent, canonical_field_off);
        match value_state {
            StxValueKind::Kptr { target } => {
                // Self-store is almost always a structural error
                // (the analyzer concluded `parent == target`
                // because of ambiguous pointer aliasing); reject
                // to keep the false-positive bar high. The Datasec
                // parent path cannot self-store: a datasec id is
                // never the target struct id of a kptr (kptrs
                // target slab structs like task_struct), so this
                // gate fires only on the `Pointer{struct}` case in
                // practice. The unconditional check is the
                // simplest safe form.
                if canonical_parent == target {
                    return;
                }
                self.note_type_id(target);
                match self.kptr_findings.get(&key).copied() {
                    None => {
                        self.kptr_findings.insert(key, KptrEntry::Single(target));
                    }
                    Some(KptrEntry::Single(prev)) if prev == target => {
                        // Same target observed again — keep Single.
                    }
                    Some(_) => {
                        // Different target previously observed at
                        // the same slot, or already collapsed to
                        // Conflicting. The slot is ambiguous;
                        // drop it on finalize.
                        self.kptr_findings.insert(key, KptrEntry::Conflicting);
                    }
                }
            }
            StxValueKind::Arena { alloc_size } => {
                // Allocator-return / alias-tracked arena pointer
                // stored into a u64 slot. Record the slot in
                // [`Self::arena_stx_findings`] so finalize emits
                // an Arena cast hit. The target type id is
                // unresolved at analysis time — the renderer's
                // [`super::btf_render::MemReader::resolve_arena_type`]
                // bridge supplies the real payload BTF id at chase
                // time, so the analyzer just records that the slot
                // saw an arena STX via [`ArenaStxEntry::Pending`].
                //
                // Two STX writes to the same slot of the same shape
                // (arena STX after another arena STX) are not a
                // conflict — both observations agree the slot
                // holds an arena pointer. The `Some(Pending)` arm
                // is the dedup no-op.
                //
                // A prior `Pointer{T}` STX into the same slot has
                // already populated `kptr_findings`; the conflict
                // detector in [`Self::finalize`] cross-references
                // both maps and drops the slot from BOTH sides so
                // the resulting CastMap excludes it. That cross-
                // path conflict is detected at finalize, NOT here:
                // `arena_stx_findings` and `kptr_findings` are
                // disjoint maps and this arm only sees prior arena
                // STX state.
                //
                // `alloc_size` rides into [`Self::arena_alloc_size_index`]
                // alongside the finding. When two STX writes record
                // disagreeing sizes (one `Some(n)`, one `Some(m)`
                // with n != m, or `Some(n)` after `None`), the
                // index entry collapses to `None` — the slot is
                // ambiguous on the size and the renderer must fall
                // back to the bridge or skip cleanly. A repeated
                // STX with the SAME `Some(n)` is a no-op; a repeated
                // `None` is a no-op (size-less observation does not
                // overwrite a prior captured size from a different
                // path, since the size is monotonic evidence: a
                // `Some(n)` is strictly more informative than `None`).
                match self.arena_stx_findings.get(&key).copied() {
                    None => {
                        self.arena_stx_findings.insert(key, ArenaStxEntry::Pending);
                        self.arena_alloc_size_index.insert(key, alloc_size);
                    }
                    Some(ArenaStxEntry::Pending) => {
                        // Same arena observation — dedup the
                        // ArenaStxEntry but reconcile the captured
                        // alloc_size:
                        //   - `Some(n)` after absent → record
                        //     `Some(n)` (the alloc-size index may
                        //     not have an entry yet on cross-pass
                        //     re-runs; treat as fresh insert).
                        //   - `Some(n)` after `Some(n)` → no-op.
                        //   - `Some(n)` after `Some(m)` with n != m
                        //     → collapse to `None` (ambiguous).
                        //   - `Some(n)` after `None` (cross-path
                        //     ambiguity already recorded) → keep
                        //     `None`; once dropped, dropped.
                        //   - `None` after anything → keep prior
                        //     entry; absence-of-capture does not
                        //     overwrite a captured size, but DOES
                        //     create an entry when none existed
                        //     (so the index reflects "this slot
                        //     saw an arena STX, no captured size").
                        match (self.arena_alloc_size_index.get(&key).copied(), alloc_size) {
                            (None, _) => {
                                self.arena_alloc_size_index.insert(key, alloc_size);
                            }
                            (Some(None), Some(_)) => {
                                // Prior pass recorded absent-of-
                                // capture; this pass brings new
                                // evidence (e.g. LoadedU64Field
                                // inheritance resolved on a later
                                // fixpoint iteration). Upgrade.
                                self.arena_alloc_size_index.insert(key, alloc_size);
                            }
                            (Some(None), None) => {
                                // Both absent — no-op.
                            }
                            (Some(Some(_)), None) => {
                                // Captured size survives an absent
                                // observation — no overwrite.
                            }
                            (Some(Some(prev)), Some(new)) if prev == new => {
                                // Identical capture — no-op.
                            }
                            (Some(Some(_)), Some(_)) => {
                                // Captured sizes disagree — collapse.
                                self.arena_alloc_size_index.insert(key, None);
                            }
                        }
                    }
                    Some(ArenaStxEntry::Conflicting) => {
                        // Unreachable: the only insertion site for
                        // `arena_stx_findings` is THIS arm, and
                        // this arm only inserts
                        // `ArenaStxEntry::Pending`. The
                        // `Conflicting` variant exists for
                        // symmetry with [`KptrEntry::Conflicting`]
                        // and as a defensive landing pad if a
                        // future code path adds disagreement
                        // detection inside the arena STX flow.
                        // Until then, reaching this arm signals a
                        // logic error in the analyzer's insertion
                        // discipline; panicking surfaces it
                        // instead of silently re-inserting and
                        // masking the bug.
                        unreachable!(
                            "arena_stx_findings cannot hold Conflicting: \
                             only the StxValueKind::Arena arm inserts, \
                             and it only inserts Pending"
                        );
                    }
                }
            }
            StxValueKind::Unknown => {
                // May-analysis: once a slot is tagged arena by any
                // path, the finding persists. A non-arena store on
                // a different path does not invalidate it — the
                // renderer's resolve_arena_type bridge checks the
                // runtime VA, which is the ground truth.
                if false {
                    tracing::debug!(
                        parent = key.0,
                        offset = key.1,
                        "cast_analysis: arena_stx_findings invalidated \
                         by non-arena DW store to same slot"
                    );
                }
                self.arena_confirmed.remove(&key);
            }
        }
    }

    /// `BPF_STX | BPF_<size> | BPF_ATOMIC` (mode == 0xc0).
    ///
    /// Atomic memory ops carry the specific operation in `imm` (see
    /// linux uapi `bpf.h`). The dispatch is driven by the
    /// `BPF_FETCH` (0x01) bit in `imm`:
    ///
    /// FETCH variants — write a register with the prior memory
    /// value, which the analyzer cannot type:
    /// - `BPF_CMPXCHG = 0xf0 | BPF_FETCH`: `r0 = atomic_cmpxchg(...)`.
    ///   R0 is overwritten with the old memory value — drop R0.
    /// - `BPF_XCHG = 0xe0 | BPF_FETCH`: `src_reg = atomic_xchg(...)`.
    /// - Arithmetic-FETCH (BPF_ADD/AND/OR/XOR with BPF_FETCH set):
    ///   `src_reg` ends up with the prior arithmetic value.
    ///
    /// All FETCH variants drop `src_reg` to Unknown (false negative
    /// on CMPXCHG, which only writes R0 — see `handle_atomic` body).
    ///
    /// Non-FETCH variants (plain BPF_ADD/AND/OR/XOR) read-modify-
    /// write memory but leave every register intact.
    ///
    /// `BPF_LOAD_ACQ = 0x100`: `dst = smp_load_acquire(*src + off)`.
    /// dst receives a memory value the analyzer cannot type — drop
    /// dst. `BPF_STORE_REL = 0x110`: `smp_store_release(*dst + off,
    /// src)` — no register effect; dst is the address base, src is
    /// the value. See linux include/linux/filter.h for the
    /// authoritative semantics.
    ///
    /// Stack-slot invalidation: when `dst == BPF_REG_R10` the atomic
    /// targets a stack slot. Any prior typed RegState parked in
    /// `stack_slots[off]` cannot survive an atomic write — drop the
    /// slot before per-register clobber logic so a later LDX through
    /// r10 cannot resurrect a stale Pointer state. Mirrors the
    /// invalidation in [`Self::handle_stx`] for plain stores and the
    /// `BPF_CLASS_ST` arm in `step()` for immediate stores.
    ///
    /// No kptr finding is recorded on any atomic store. The kptr
    /// path requires the store to publish the value verbatim into
    /// the slot (the kernel's `bpf_kptr_xchg` helper is the proper
    /// kptr-write path); atomic XCHG/CMPXCHG semantics differ
    /// enough that attributing `Pointer{T}` source to the slot is
    /// unsafe.
    fn handle_atomic(&mut self, dst: usize, src: usize, imm: i32, off: i16) {
        // dst / src already bounds-checked at the top of step().
        // Avoid panicking even if a future caller forgets.
        if dst >= self.regs.len() || src >= self.regs.len() {
            return;
        }

        // Stack-slot invalidation: an atomic STORE through r10
        // overwrites the slot's prior content. LOAD_ACQ is the lone
        // exception: dst is the value receiver, NOT the address
        // base (`*src + off` is the address), so dst==r10 on
        // LOAD_ACQ does not write through to the stack — it merely
        // attempts to load INTO r10 (which the verifier rejects)
        // and produces no slot mutation. Skip invalidation in that
        // case so an unrelated stack slot at the same `off` keeps
        // its tracked state.
        if dst == BPF_REG_R10 && imm != BPF_LOAD_ACQ_IMM {
            self.stack_slots.remove(&off);
        }

        // BPF_LOAD_ACQ (0x100): dst register receives memory value;
        // src is the address base. Clobber dst to Unknown — the
        // analyzer does not type loaded values via atomic mode.
        // BPF_STORE_REL (0x110): dst is the address base, src is the
        // value being stored. Stack invalidation above already
        // handles the spill case; no per-register clobber here.
        if imm == BPF_LOAD_ACQ_IMM {
            self.set_reg(dst, RegState::Unknown);
            return;
        }
        if imm == BPF_STORE_REL_IMM {
            return;
        }

        let top = imm & 0xf0;
        let has_fetch = (imm & BPF_FETCH) != 0;

        // BPF_CMPXCHG = 0xf0 | BPF_FETCH = 0xf1. R0 receives the old
        // memory value regardless of whether the compare succeeded.
        if top == BPF_CMPXCHG_TOP && has_fetch {
            self.set_reg(0, RegState::Unknown);
        }
        // BPF_XCHG = 0xe0 | BPF_FETCH = 0xe1. src_reg receives the
        // old memory value. Same direction for any other FETCH
        // variant (BPF_ADD/AND/OR/XOR with BPF_FETCH bit set):
        // src_reg ends up holding a value the analyzer cannot
        // type, so drop it to Unknown.
        //
        // CMPXCHG (0xf1) only writes R0; we conservatively clobber
        // src on all FETCH variants including CMPXCHG — false
        // negative, acceptable.
        if has_fetch {
            self.set_reg(src, RegState::Unknown);
        }
        // Non-fetch atomic ops (plain BPF_ADD/AND/OR/XOR) do not
        // overwrite any register — leave RegState alone.
    }

    /// `BPF_CALL` with `src_reg == BPF_PSEUDO_KFUNC_CALL`.
    ///
    /// `imm` is the BTF id of a `Type::Func` (peeled one level to
    /// its FuncProto) or a `Type::FuncProto` directly. Peel the
    /// return type through Ptr -> Struct and set R0 if the chain
    /// succeeds. Negative or zero `imm` indicates either a
    /// pre-relocation kfunc placeholder (real on-disk `.bpf.o`
    /// files typically have `imm = -1` for kfunc calls before
    /// libbpf resolves the kernel BTF id) or a non-kfunc call;
    /// drop silently in both cases.
    ///
    /// Two distinct R0 typings happen here:
    ///
    /// 1. **Typed-pointer return** (`Ptr -> Struct/Union`): the
    ///    kfunc returns a typed kernel pointer (e.g.
    ///    `bpf_task_acquire`, `bpf_cpumask_first`). R0 becomes
    ///    [`RegState::Pointer`] so the next STX of R0 into a u64
    ///    slot of a typed parent records a kernel kptr finding.
    ///
    /// 2. **Arena-allocator return** (`Ptr -> Void`, allowlisted
    ///    name): the kfunc allocates arena memory and returns a
    ///    raw `void *` whose runtime value is an arena VA (e.g.
    ///    `bpf_arena_alloc_pages`). The Ptr->Void return is
    ///    structurally indistinguishable from a typed pointer at
    ///    the BTF level — neither side carries a `__arena`
    ///    qualifier in the kernel's program-BTF representation —
    ///    so the disambiguator is the kfunc's name. R0 becomes
    ///    [`RegState::ArenaU64FromAlloc`] so the next STX of R0
    ///    into a u64 slot of a typed parent records an arena
    ///    finding via `arena_stx_findings`. Arms 1 and 2 are
    ///    mutually exclusive: arm 1 only fires when the return
    ///    peels to a Struct/Union; arm 2 only fires when the
    ///    return peels to Void AND the name is on the allowlist.
    ///    A kfunc whose name is on the allowlist but whose
    ///    return is NOT Ptr->Void (BTF mismatch — drift between
    ///    kernel source and analyzer's allowlist) drops to no
    ///    typing rather than misclassifying R0.
    fn handle_kfunc_call(&mut self, imm: i32) {
        if imm <= 0 {
            return;
        }
        let func_btf_id = imm as u32;
        // Resolve the kfunc's FuncProto AND retain a handle on the
        // `Type::Func` so we can resolve its name for the
        // allocator-allowlist arm. The two-arm dispatch needs both
        // pieces of evidence (return-type shape + name), so the
        // resolution is unified here rather than running twice.
        let (proto, func_name) = match self.btf.resolve_type_by_id(func_btf_id) {
            Ok(Type::Func(f)) => match f.get_type_id() {
                Ok(pid) => match self.btf.resolve_type_by_id(pid) {
                    Ok(Type::FuncProto(fp)) => {
                        let name = self.btf.resolve_name(&f).ok();
                        (fp, name)
                    }
                    _ => return,
                },
                Err(_) => return,
            },
            Ok(Type::FuncProto(fp)) => (fp, None),
            _ => return,
        };
        let ret_id = proto.return_type_id();
        if ret_id == 0 {
            // Void return at the FuncProto level (return_type_id
            // == 0 marks `void` in BTF). R0 stays Unknown — no
            // arena allocator declares this shape (allocators
            // return `void *`, not `void`).
            return;
        }
        // Arm 1: typed-pointer return.
        if let Some(sid) = super::bpf_map::resolve_to_struct_id(self.btf, ret_id) {
            self.regs[0] = RegState::Pointer {
                struct_type_id: sid,
            };
            self.note_type_id(sid);
            return;
        }
        // Arm 2: arena-allocator return. The allowlist lookup
        // is gated on `Ptr -> Void` to keep the false-positive
        // bar high — a same-named kfunc whose return is NOT
        // Ptr->Void cannot have its R0 typed by this arm. This
        // protects against name collisions between a future
        // arena-returning kfunc and an unrelated kfunc that
        // happens to share a name.
        if return_peels_to_ptr_void(self.btf, ret_id)
            && let Some(name) = func_name.as_deref()
            && ARENA_ALLOC_KFUNC_NAMES.contains(&name)
        {
            // Kfunc allocators (e.g. `bpf_arena_alloc_pages`) carry no
            // captured `sizeof` at this analyzer site — the call's R1
            // is the page count, not a payload size, and the page-
            // granularity allocation has its own resolution path
            // (slab metadata or bridge entry) rather than the static-
            // alloc size-match fallback. `alloc_size: None` selects
            // the bridge or skips the chase rather than guessing.
            self.regs[0] = RegState::ArenaU64FromAlloc {
                source: None,
                alloc_size: None,
            };
            // F4 telemetry parity with the SubprogReturn arm:
            // count this as an applied allocator seed so the
            // finalize warn distinguishes "allocator was called
            // but no slot got tagged" from "no allocator was
            // ever called" identically across kfunc and subprog
            // paths.
            self.alloc_seeds_applied = self.alloc_seeds_applied.saturating_add(1);
            self.func_has_alloc = true;
        }
    }

    fn set_reg(&mut self, idx: usize, state: RegState) {
        // R10 is the read-only frame pointer per BPF ABI; the
        // verifier rejects programs that mutate it. Maintain the
        // invariant that regs[R10] stays Unknown so a later LDX/STX
        // through r10 cannot resurrect a stale typed-pointer state.
        if idx == BPF_REG_R10 {
            return;
        }
        if idx < self.regs.len() {
            self.regs[idx] = state;
        }
    }

    fn bridge_map_value_spill(&mut self, value_struct_id: u32, r3_base: i16) {
        let (t, _peeled_id) =
            match super::btf_render::peel_modifiers_with_id(self.btf, value_struct_id) {
                Some(v) => v,
                None => return,
            };
        let s = match t {
            Type::Struct(s) | Type::Union(s) => s,
            _ => return,
        };
        for m in &s.members {
            if matches!(m.bitfield_size(), Some(b) if b > 0) {
                continue;
            }
            let bit_off = m.bit_offset();
            if bit_off % 8 != 0 {
                continue;
            }
            let member_off = bit_off / 8;
            let Ok(member_tid) = m.get_type_id() else {
                continue;
            };
            let Some(terminal) = super::btf_render::peel_modifiers(self.btf, member_tid) else {
                continue;
            };
            let Type::Int(int) = terminal else { continue };
            if int.size() != 8 || int.is_signed() || int.is_bool() || int.is_char() {
                continue;
            }
            let Some(slot_off) = i16::try_from(member_off as i32)
                .ok()
                .and_then(|o| r3_base.checked_add(o))
            else {
                continue;
            };
            let ever_arena = self.func_has_alloc;
            let slot = match self.stack_slots.get(&slot_off).copied() {
                Some(s) => s,
                // Function-level "this function called an allocator"
                // heuristic: when a stack slot has no recorded state
                // but the function has seen an allocator-return seed,
                // assume the slot might hold an arena pointer. The
                // synthesized state has no captured `alloc_size` —
                // the bridge fires for a typed allocator's per-slot
                // header, and this heuristic is the fallback path
                // when the linear walk lost track. `None` keeps the
                // chase falling back to the bridge or skipping
                // cleanly.
                None if ever_arena => RegState::ArenaU64FromAlloc {
                    source: None,
                    alloc_size: None,
                },
                None => continue,
            };
            let key = (value_struct_id, member_off);
            match slot {
                _ if ever_arena => {
                    self.note_type_id(value_struct_id);
                    let captured = match slot {
                        RegState::ArenaU64FromAlloc { alloc_size, .. } => alloc_size,
                        RegState::LoadedU64Field {
                            source_struct_id,
                            field_offset,
                        } => self
                            .arena_alloc_size_index
                            .get(&(source_struct_id, field_offset))
                            .copied()
                            .flatten(),
                        _ => None,
                    };
                    if let std::collections::btree_map::Entry::Vacant(e) =
                        self.arena_stx_findings.entry(key)
                    {
                        e.insert(ArenaStxEntry::Pending);
                        self.arena_alloc_size_index.insert(key, captured);
                        self.bridge_slot_origins.insert(slot_off, key);
                    } else {
                        match (self.arena_alloc_size_index.get(&key).copied(), captured) {
                            (None, _) => {
                                self.arena_alloc_size_index.insert(key, captured);
                            }
                            (Some(None), Some(_)) => {
                                self.arena_alloc_size_index.insert(key, captured);
                            }
                            (Some(Some(prev)), Some(new)) if prev != new => {
                                self.arena_alloc_size_index.insert(key, None);
                            }
                            _ => {}
                        }
                    }
                }
                RegState::Pointer {
                    struct_type_id: target,
                } => {
                    if value_struct_id == target {
                        continue;
                    }
                    self.note_type_id(value_struct_id);
                    self.note_type_id(target);
                    match self.kptr_findings.get(&key).copied() {
                        None => {
                            self.kptr_findings.insert(key, KptrEntry::Single(target));
                        }
                        Some(KptrEntry::Single(prev)) if prev == target => {}
                        Some(_) => {
                            self.kptr_findings.insert(key, KptrEntry::Conflicting);
                        }
                    }
                }
                _ => {}
            }
        }
    }

    fn note_type_id(&mut self, id: u32) {
        if id > self.max_seen_type_id {
            self.max_seen_type_id = id;
        }
    }

    fn finalize(self) -> CastMap {
        let mut out = CastMap::new();
        let max_id = self
            .max_seen_type_id
            .saturating_add(CANDIDATE_SEARCH_SLACK)
            .min(super::sdt_alloc::MAX_BTF_ID_PROBE);
        // F15 mitigation: warn when the candidate-search slack
        // capped against the hard ceiling. A scheduler whose largest
        // touched id is close to MAX_BTF_ID_PROBE means
        // [`build_layout_index`] cannot probe every type the BTF
        // exposes — shape-inference candidates above the cap are
        // invisible. Surface this as a `warn!` so a future BTF that
        // genuinely exceeds the ceiling shows up rather than silently
        // missing candidates.
        if self.max_seen_type_id.saturating_add(CANDIDATE_SEARCH_SLACK)
            > super::sdt_alloc::MAX_BTF_ID_PROBE
        {
            tracing::warn!(
                max_seen_type_id = self.max_seen_type_id,
                slack = CANDIDATE_SEARCH_SLACK,
                cap = super::sdt_alloc::MAX_BTF_ID_PROBE,
                "cast_analysis: candidate-search slack capped at MAX_BTF_ID_PROBE; \
                 shape-inference candidates above the cap are invisible"
            );
        }

        // Pre-build (offset, size) -> { type_id } so each pattern
        // does not re-walk the entire BTF id space. The walk stops
        // at the first sustained run of unresolved ids -- BTF id
        // tables are dense in practice but tolerate small gaps.
        let layout = build_layout_index(self.btf, max_id);

        // Arena/kptr conflict drop set: any (source, offset) slot
        // observed by BOTH an arena path (`self.patterns` —
        // the slot was loaded as a u64 then dereferenced as a
        // pointer base; OR `self.arena_stx_findings` — an
        // [`RegState::ArenaU64FromAlloc`] value was stored into
        // the slot) AND the kernel STX path (`self.kptr_findings`
        // — a typed `Pointer{T}` was stored into the slot) is
        // ambiguous. The same byte cannot simultaneously hold an
        // arena VA (deref via arena reader) and a kernel VA (deref
        // via slab/vmalloc reader); seeing both is evidence the
        // analyzer's flow-insensitive register tracking confused
        // disjoint code paths against the same slot. False positive
        // is unacceptable, so drop both observations and let the
        // renderer fall back to the raw u64 path. False negative
        // is acceptable. Note that `self.patterns` includes keys
        // with empty access sets (the slot was loaded but never
        // dereferenced); those carry no signal either way and are
        // not treated as arena evidence here.
        // Arena+Kptr agreement: when a slot has BOTH an arena finding
        // AND a kptr finding, AND the kptr's value was stored through
        // an addr_space_cast (the slot is arena_confirmed), the two
        // observations AGREE — the pointer is an arena VA with a known
        // type. Merge: emit an Arena CastHit with the kptr's
        // target_type_id. Only drop when the slot is NOT
        // arena_confirmed (genuine kernel/arena ambiguity).
        let mut arena_kptr_merged: BTreeMap<(u32, u32), u32> = BTreeMap::new();
        let conflicting: BTreeSet<(u32, u32)> = self
            .patterns
            .iter()
            .filter(|(_, accesses)| !accesses.is_empty())
            .map(|(k, _)| *k)
            .chain(self.arena_confirmed.iter().copied())
            .chain(self.arena_stx_findings.keys().copied())
            .filter(|k| self.kptr_findings.contains_key(k))
            .filter(|k| {
                // Merge when the kptr's target is Fwd in the
                // entry BTF — a scheduler-specific arena struct
                // whose body was dropped by split-BTF dedup. A
                // kernel kptr would have a full Struct body in
                // the vmlinux base, not a Fwd.
                if let Some(KptrEntry::Single(tid)) = self.kptr_findings.get(k)
                    && let Ok(ty) = self.btf.resolve_type_by_id(*tid)
                    && matches!(ty, Type::Fwd(_))
                {
                    arena_kptr_merged.insert(*k, *tid);
                    return false;
                }
                true // genuine conflict
            })
            .collect();

        // Track keys already emitted as Arena via the STX-flow path
        // so the shape-inference loop below can short-circuit a
        // duplicate emit. Both paths produce
        // `addr_space: AddrSpace::Arena` for the same slot, but
        // shape inference may resolve `target_type_id` to a concrete
        // BTF struct id while the STX-flow path emits `0` (deferred
        // resolve via `MemReader::resolve_arena_type` bridge). The
        // shape-inference target is always at least as informative,
        // so the LATER loop wins by overwriting on the same key.
        // Recording arena-STX hits first, then letting the shape
        // loop overwrite when it has a concrete id, gives the best
        // of both: the bridge fires for slots without shape-derived
        // ids, and concrete ids take precedence when both fire.

        // Arena STX-flow path: directly observed STX of an
        // [`RegState::ArenaU64FromAlloc`] value into a u64 slot.
        // Emit with `target_type_id == 0` — the renderer's
        // [`MemReader::resolve_arena_type`] bridge resolves the
        // payload BTF id at chase time from the live arena snapshot
        // (cross-BTF Fwd resolution). Conflicting slots (also seen
        // as kptr STX) drop here AND on the kptr side.
        //
        // # When the deferred resolve succeeds vs fails at chase
        // time
        //
        // The bridge is backed by
        // [`super::dump::render_map::ArenaSlotIndex`], which the
        // sdt_alloc pre-pass populates by walking
        // [`super::sdt_alloc::SdtAllocatorSnapshot`] for every
        // **per-instance** allocator (`scx_alloc_internal` and
        // friends). The bridge therefore RESOLVES at chase time only
        // when the chased pointer's runtime value falls inside an
        // sdt_alloc slot's `[slot_start, slot_start + elem_size)`
        // range AND lands at either the slot start (header_skip ==
        // header_size) or payload start (header_skip == 0).
        //
        // The bridge does NOT cover bump-allocator allocations from
        // `scx_static_alloc_internal` — that allocator has no
        // per-allocation header and produces a flat arena region
        // with no per-slot metadata the pre-pass can index. A slot
        // whose arena VA was produced by `scx_static_alloc_internal`
        // and whose target BTF type is unique-shape-inferable at
        // analysis time will resolve via the shape-inference loop
        // below (concrete `target_type_id != 0`); a slot whose
        // shape is ambiguous (multiple BTF structs match the access
        // pattern) and whose VA is from `scx_static_alloc_internal`
        // will fall through with `target_type_id == 0` and the
        // bridge will return `None` at chase time, so the chase
        // skips with a clear "no entry for 0x{val:x}" reason.
        // This is the "no invalid data made" contract: ambiguous
        // shape + no per-slot index = fail-closed, no chase, no
        // wrong render.
        for (key, entry) in &self.arena_stx_findings {
            // Filter out `Conflicting` entries defensively: today no
            // insertion path produces them (`handle_stx` only inserts
            // `Pending` and `unreachable!()`s on a `Conflicting`
            // overwrite), but a future enrichment of the arena STX
            // flow could legitimately record disagreement; this gate
            // keeps the drop semantics in one place.
            if !matches!(entry, ArenaStxEntry::Pending) {
                continue;
            }
            if conflicting.contains(key) {
                continue;
            }
            let inferred_target = self.patterns.get(key).and_then(|accesses| {
                if accesses.is_empty() {
                    return None;
                }
                let mut iter = accesses.iter();
                let first = iter.next()?;
                let empty = HashSet::new();
                let mut candidates: HashSet<u32> = layout
                    .get(&(first.offset, first.size))
                    .cloned()
                    .unwrap_or_default();
                for acc in iter {
                    let next = layout.get(&(acc.offset, acc.size)).unwrap_or(&empty);
                    candidates.retain(|c| next.contains(c));
                    if candidates.is_empty() {
                        break;
                    }
                }
                candidates.remove(&key.0);
                if candidates.len() == 1 {
                    candidates.into_iter().next()
                } else {
                    None
                }
            });
            let target_type_id = inferred_target.unwrap_or(0);
            // Pull the captured `sizeof` argument from the per-slot
            // index. `Some(Some(n))` is a clean capture; `Some(None)`
            // is "slot saw an arena STX but the size was either not
            // captured (kfunc / scx_alloc_internal) or got collapsed
            // due to disagreement"; absent means no STX from this
            // path recorded a size — both flatten to `None`. The
            // chase path uses the size only when the bridge fails,
            // so a missing or ambiguous size silently selects the
            // bridge or skips with a clear reason — never misrenders.
            let alloc_size = self.arena_alloc_size_index.get(key).copied().flatten();
            tracing::debug!(
                parent = key.0,
                offset = key.1,
                target = target_type_id,
                alloc_size = ?alloc_size,
                "cast_analysis: arena STX-flow hit emitted"
            );
            out.insert(
                *key,
                CastHit {
                    target_type_id,
                    addr_space: AddrSpace::Arena,
                    alloc_size,
                },
            );
        }

        // Arena+Kptr merged: emit Arena CastHits with the kptr's
        // resolved target_type_id for slots where both arena and kptr
        // observations agreed (addr_space_cast confirmed arena).
        for (key, kptr_target) in &arena_kptr_merged {
            let alloc_size = self.arena_alloc_size_index.get(key).copied().flatten();
            tracing::debug!(
                parent = key.0,
                offset = key.1,
                target = kptr_target,
                alloc_size = ?alloc_size,
                "cast_analysis: arena+kptr merged hit emitted"
            );
            out.insert(
                *key,
                CastHit {
                    target_type_id: *kptr_target,
                    addr_space: AddrSpace::Arena,
                    alloc_size,
                },
            );
        }

        // Arena pointer path (shape inference): BTF-shape-inferred
        // targets. Tagged as AddrSpace::Arena because the source
        // u64 field is itself dereferenced and its target struct is
        // recovered by intersecting struct shapes across the
        // observed access pattern.
        //
        // F1 mitigation: require direct evidence the slot held an
        // arena VA before emitting a shape-inference hit. The 4 GiB
        // arena window catches any 33-bit value as "in arena" at
        // chase time, so a slot that just happens to hold a
        // 33-bit-shaped counter could be mis-rendered as an arena
        // pointer. Direct evidence comes from EITHER an
        // observed `BPF_ADDR_SPACE_CAST` on a value loaded from the
        // slot (`self.arena_confirmed`) OR an observed STX of an
        // allocator-tagged value into the slot
        // (`self.arena_stx_findings` — see the STX-flow path above).
        // Slots with neither observation drop here; an operator can
        // re-enable inference for a specific slot by adding either
        // a `bpf_addr_space_cast` site or the STX-flow tag in the
        // scheduler source.
        for ((source, field_off), accesses) in &self.patterns {
            // A field that was loaded but never dereferenced gives
            // no signal. Drop it -- the renderer's existing u64
            // path is the correct fallback.
            if accesses.is_empty() {
                continue;
            }
            // Conflict with kptr path on the same slot: drop both
            // observations (the kptr loop below also skips this key).
            if conflicting.contains(&(*source, *field_off)) {
                continue;
            }
            let key = (*source, *field_off);
            if out.contains_key(&key) {
                continue;
            }
            let has_direct_evidence =
                self.arena_confirmed.contains(&key) || self.arena_stx_findings.contains_key(&key);
            if !has_direct_evidence {
                tracing::debug!(
                    parent_type_id = source,
                    field_offset = field_off,
                    accesses = accesses.len(),
                    "cast_analysis: shape-inference candidate without direct evidence; dropped (F1 mitigation)"
                );
                continue;
            }
            // Intersection of candidate type ids across every
            // observed (offset, size). The first lookup seeds
            // `candidates` by cloning once; subsequent lookups
            // retain only elements present in the next set.
            let mut iter = accesses.iter();
            let first = iter.next().expect("non-empty checked above");
            let empty = HashSet::new();
            let mut candidates: HashSet<u32> = layout
                .get(&(first.offset, first.size))
                .cloned()
                .unwrap_or_default();
            for acc in iter {
                let next = layout.get(&(acc.offset, acc.size)).unwrap_or(&empty);
                candidates.retain(|c| next.contains(c));
                if candidates.is_empty() {
                    break;
                }
            }
            candidates.remove(source);

            if candidates.len() == 1 {
                let target = candidates.into_iter().next().unwrap();
                // Shape-inference hits inherit any captured size
                // from the same slot's STX-flow observation — the
                // shape inference resolved a concrete BTF id, but
                // the renderer's chase still benefits from knowing
                // the producing allocator's size for bridge-fallback
                // diagnostics and future selection logic. Absent
                // entry → `None`.
                let alloc_size = self
                    .arena_alloc_size_index
                    .get(&(*source, *field_off))
                    .copied()
                    .flatten();
                tracing::debug!(
                    parent = source,
                    offset = field_off,
                    target,
                    accesses = accesses.len(),
                    alloc_size = ?alloc_size,
                    "cast_analysis: shape-inference hit emitted"
                );
                out.insert(
                    (*source, *field_off),
                    CastHit {
                        target_type_id: target,
                        addr_space: AddrSpace::Arena,
                        alloc_size,
                    },
                );
            }
            // 0 or 2+ candidates -> drop silently. False negative
            // is the safe direction.
        }

        // F4 mitigation: surface allocator call sites that the
        // analyzer saw but could not follow into a typed-slot
        // STX. These manifest when a scheduler does not mark its
        // allocator helpers `__always_inline` — the analyzer sees
        // the helper-call site (one or more allocator seeds applied)
        // but cannot follow the tagged R0 across the call boundary
        // into the caller's frame, so no slot ends up in
        // [`Self::arena_stx_findings`]. Emit one warning per dump
        // pass to keep noise bounded.
        //
        // Gate:
        //   - At least one allocator seed was applied (counted by
        //     [`Self::alloc_seeds_applied`]). Without this, no
        //     allocator was ever called and the warn would be
        //     spurious noise.
        //   - `arena_stx_findings` is empty. A non-empty findings
        //     map means at least one slot DID get tagged; that is
        //     the normal allocator-return seed path's happy shape
        //     the prior gate incorrectly flagged. The gate is now
        //     strict on the specific `__always_inline` failure
        //     mode.
        //
        // The prior gate (`!arena_stx_findings.is_empty() &&
        // arena_confirmed.is_empty()`) fired on the normal
        // allocator-return seed path's happy shape where a
        // scheduler correctly inlines the allocator AND the
        // consumer reads through the slot via STX-flow alone (no
        // `bpf_addr_space_cast` site). The operator received a
        // misleading "may need __always_inline" warning on a
        // working pipeline.
        if self.alloc_seeds_applied > 0 && self.arena_stx_findings.is_empty() {
            tracing::warn!(
                alloc_seeds_applied = self.alloc_seeds_applied,
                "cast_analysis: allocator seeds applied but no slot got an arena \
                 STX tag; allocator helpers may need __always_inline so the \
                 returned R0 reaches a typed-slot STX without crossing a \
                 BPF-to-BPF call boundary"
            );
        }

        // Kernel kptr path: directly observed STX of a typed
        // pointer into a u64 slot. The target type is known
        // exactly from the value register's RegState — no shape
        // inference needed. Conflicting writes to the same slot
        // (different target types) drop. Slots that ALSO appear
        // in any arena path (`conflicting` above) drop on
        // both sides — the analyzer cannot tell which observation
        // is real, and emitting either tag risks a false positive.
        for (key, entry) in self.kptr_findings {
            let KptrEntry::Single(target) = entry else {
                continue;
            };
            if conflicting.contains(&key) {
                continue;
            }
            if arena_kptr_merged.contains_key(&key) {
                continue;
            }
            // Kernel kptr findings carry no allocator-supplied size:
            // the value is a typed kernel pointer, not an arena
            // allocator return, so the size-match BTF resolution
            // path does not apply. `None` keeps the chase using the
            // analyzer's resolved `target_type_id`.
            out.insert(
                key,
                CastHit {
                    target_type_id: target,
                    addr_space: AddrSpace::Kernel,
                    alloc_size: None,
                },
            );
        }

        let arena_count = out
            .values()
            .filter(|h| h.addr_space == AddrSpace::Arena)
            .count();
        let kernel_count = out
            .values()
            .filter(|h| h.addr_space == AddrSpace::Kernel)
            .count();
        tracing::debug!(
            total = out.len(),
            arena = arena_count,
            kernel = kernel_count,
            "cast_analysis: finalize summary"
        );
        out
    }
}

/// Pre-scan the program for jump-target PCs.
///
/// Targets are computed as `pc + 1 + insn.off` for every BPF_JMP /
/// BPF_JMP32 instruction except `EXIT` and `CALL`. Out-of-range
/// targets (negative resolved address, or past `insns.len()`) are
/// dropped.
fn jump_targets(insns: &[BpfInsn]) -> BTreeSet<usize> {
    let mut targets = BTreeSet::new();
    let mut skip_next = false;
    for (pc, insn) in insns.iter().enumerate() {
        if skip_next {
            skip_next = false;
            continue;
        }
        let class = insn.code & 0x07;
        if class == BPF_CLASS_LD && insn.code == (BPF_CLASS_LD | BPF_SIZE_DW | BPF_MODE_IMM) {
            // BPF_LD_IMM64 takes two slots; the second slot's `code`
            // is 0. BPF_JA's op nibble is 0x00 (under BPF_CLASS_JMP =
            // 0x05, the full JA opcode is 0x05); a bare 0 code byte
            // has class 0 (BPF_LD), not 0x05, so it would not match
            // the JMP-class gate below — but skipping the second
            // slot explicitly maintains symmetry with the main pass.
            skip_next = true;
            continue;
        }
        if class != BPF_CLASS_JMP && class != BPF_CLASS_JMP32 {
            continue;
        }
        let op = insn.code & 0xf0;
        if op == BPF_OP_EXIT || op == BPF_OP_CALL {
            continue;
        }
        // JMP32 | JA ("gotol") uses insn.imm for the 32-bit jump
        // offset, not insn.off (which is 16-bit). All other JMP/JMP32
        // instructions use insn.off. See kernel filter.h BPF_JMP32_A.
        let jump_off = if class == BPF_CLASS_JMP32 && op == 0x00 {
            // BPF_JA = 0x00 under JMP32 class = gotol
            insn.imm as i64
        } else {
            insn.off as i64
        };
        let next = pc as i64 + 1 + jump_off;
        if next >= 0 && (next as usize) < insns.len() {
            targets.insert(next as usize);
        }
    }
    targets
}

/// Build a `(offset, size_bytes) -> {type_ids}` index over every
/// BTF struct / union with a non-bitfield member at that location
/// whose member type has the given size. The matching phase
/// intersects sets across observed accesses to collapse to a single
/// candidate when one exists.
///
/// Recurses into anonymous nested struct / union members so that
/// the outer struct's index entries cover offsets that physically
/// live inside an inner anonymous aggregate. C lets a struct embed
/// an unnamed inner struct/union directly — every cache-line-padded
/// `struct { ... } __aligned(64);` member in the BPF schedulers is
/// such a case (`struct scx_cgroup_ctx` lays out two anonymous
/// inner structs at offsets 0 and 64). Named nested structs are
/// distinct types with their own entries; recursion is gated on
/// `btf.resolve_name(...) == ""` to avoid double-indexing them
/// under every parent that happens to embed one.
fn build_layout_index(btf: &Btf, max_id: u32) -> HashMap<(u32, u32), HashSet<u32>> {
    let mut out: HashMap<(u32, u32), HashSet<u32>> = HashMap::new();
    let mut size_cache: HashMap<u32, Option<u32>> = HashMap::new();
    let mut consecutive_fail: u32 = 0;
    const CONSECUTIVE_FAIL_CAP: u32 = 256;

    let mut tid: u32 = 1;
    while tid <= max_id {
        match btf.resolve_type_by_id(tid) {
            Ok(Type::Struct(s)) | Ok(Type::Union(s)) => {
                consecutive_fail = 0;
                index_aggregate_members(btf, tid, &s.members, 0, &mut out, &mut size_cache, 0);
            }
            Ok(_) => {
                consecutive_fail = 0;
            }
            Err(_) => {
                consecutive_fail += 1;
                if consecutive_fail >= CONSECUTIVE_FAIL_CAP {
                    break;
                }
            }
        }
        tid += 1;
    }
    out
}

/// Maximum recursion depth for [`index_aggregate_members`].
///
/// Pathological BTF (cyclic typedef chains feeding back into a
/// struct member) could otherwise cause unbounded recursion.
/// Real C aggregates rarely nest more than a handful of levels.
const LAYOUT_INDEX_MAX_DEPTH: u32 = 8;

/// Walk `members` recording `(offset, size) -> parent_tid` entries
/// in `out`, and recurse into anonymous struct/union members so
/// their inner fields surface under `parent_tid` as well as under
/// the inner aggregate's own type id.
///
/// `base_offset` is added to each member's byte offset so that
/// fields inside a nested anonymous aggregate land at the correct
/// physical offset relative to the outer struct.
fn index_aggregate_members(
    btf: &Btf,
    parent_tid: u32,
    members: &[btf_rs::Member],
    base_offset: u32,
    out: &mut HashMap<(u32, u32), HashSet<u32>>,
    size_cache: &mut HashMap<u32, Option<u32>>,
    depth: u32,
) {
    if depth >= LAYOUT_INDEX_MAX_DEPTH {
        return;
    }
    for m in members {
        let bit_off = m.bit_offset();
        if bit_off % 8 != 0 {
            continue;
        }
        if matches!(m.bitfield_size(), Some(s) if s > 0) {
            continue;
        }
        let off = base_offset + bit_off / 8;
        if let Some(size) = cached_member_size(btf, m, size_cache) {
            out.entry((off, size)).or_default().insert(parent_tid);
        }

        // Recurse into anonymous nested structs/unions so the outer
        // parent_tid covers offsets that physically reside inside
        // the inner aggregate. peel_modifiers strips Const /
        // Volatile / Typedef / DeclTag / TypeTag / Restrict so
        // `__aligned(64) struct { ... }` (which clang emits as a
        // typedef-or-modifier-wrapped anonymous struct) still
        // surfaces as Type::Struct here.
        let Ok(member_tid) = m.get_type_id() else {
            continue;
        };
        let Some(peeled) = super::btf_render::peel_modifiers(btf, member_tid) else {
            continue;
        };
        let inner = match peeled {
            Type::Struct(s) | Type::Union(s) => s,
            _ => continue,
        };
        let Ok(name) = btf.resolve_name(&inner) else {
            continue;
        };
        if !name.is_empty() {
            continue;
        }
        index_aggregate_members(
            btf,
            parent_tid,
            &inner.members,
            off,
            out,
            size_cache,
            depth + 1,
        );
    }
}

fn cached_member_size(
    btf: &Btf,
    m: &btf_rs::Member,
    cache: &mut HashMap<u32, Option<u32>>,
) -> Option<u32> {
    let tid = m.get_type_id().ok()?;
    *cache
        .entry(tid)
        .or_insert_with(|| member_size_bytes(btf, m))
}

/// Resolve `bpf_member` to a byte size, peeling Const / Volatile /
/// Restrict / Typedef / TypeTag / DeclTag chains via the renderer's
/// shared [`super::btf_render::peel_modifiers`] and sizing through
/// [`super::btf_render::type_size`]. Returns `None` for shapes the
/// renderer's sizing routine cannot resolve (Func, FuncProto, Var,
/// Datasec, Fwd, Void). For non-byte-multiple ints the BTF-declared
/// size is returned verbatim — a `__int128` member surfaces as
/// `Some(16)` and the matcher simply finds no LDX access of that
/// width to intersect against.
fn member_size_bytes(btf: &Btf, m: &btf_rs::Member) -> Option<u32> {
    let tid = m.get_type_id().ok()?;
    let terminal = super::btf_render::peel_modifiers(btf, tid)?;
    super::btf_render::type_size(btf, &terminal).map(|s| s as u32)
}

/// Resolved member of a parent BTF aggregate at a specific byte
/// offset.
///
/// Models the "what type lives here" answer for both
/// [`Type::Struct`] / [`Type::Union`] (regular C aggregates) and
/// [`Type::Datasec`] (`.bss` / `.data` / `.rodata` global sections,
/// which libbpf encodes as a flat sequence of `VarSecinfo` ->
/// `BTF_KIND_VAR` entries rather than as struct members). The
/// caller looks up `member_type_id`, peels modifiers, and decides
/// whether the location is a u64-typed kptr slot.
///
/// `byte_offset` is the offset returned by the parent's layout —
/// equal to the queried offset for an exact-offset hit on a struct
/// member, or the start-of-variable offset for a Datasec hit when
/// the queried offset lies inside a multi-byte variable's range.
/// The current callers (`handle_ldx` u64 detection and `handle_stx`
/// kptr-finding) require the queried offset to land exactly on a
/// member boundary; the start-of-variable Datasec semantics
/// preserve that invariant for plain u64 globals (the variable
/// starts AT the queried offset) while letting struct globals
/// surface as a Struct-typed member that the LDX path threads
/// through `peel_modifiers` for further analysis.
#[derive(Debug, Clone)]
enum MemberAt {
    /// Hit on a `BTF_KIND_STRUCT` / `BTF_KIND_UNION` member at the
    /// queried byte offset. `member_type_id` is the BTF id of the
    /// member's declared type. `resolved_parent_type_id` is the
    /// BTF id of the struct that directly contains this member —
    /// for nested structs this is the INNERMOST struct, not the
    /// outermost base register's struct. The CastMap keys on this
    /// id so the renderer's per-struct cast_lookup matches.
    Struct {
        member_type_id: u32,
        resolved_parent_type_id: u32,
        resolved_member_offset: u32,
    },
    /// Hit on a `BTF_KIND_DATASEC` `VarSecinfo` whose byte range
    /// contains the queried offset. `var_underlying_type_id` is
    /// the BTF id of the `BTF_KIND_VAR`'s underlying type (the
    /// global variable's actual C type — typically a u64, struct,
    /// or array). `var_byte_offset` is the variable's start
    /// offset within the section. For an exact-offset hit on a
    /// plain u64 global, `var_byte_offset == queried_offset`.
    /// For a struct-typed global, `var_byte_offset <=
    /// queried_offset < var_byte_offset + var_size`.
    Datasec {
        var_underlying_type_id: u32,
        var_byte_offset: u32,
    },
}

impl MemberAt {
    /// BTF id of the member's declared type. The caller peels
    /// modifiers and decides whether the location is a u64-typed
    /// kptr slot.
    fn member_type_id(&self) -> u32 {
        match self {
            Self::Struct { member_type_id, .. } => *member_type_id,
            Self::Datasec {
                var_underlying_type_id,
                ..
            } => *var_underlying_type_id,
        }
    }
}

/// Find the member at `byte_offset` within the parent BTF aggregate
/// identified by `parent_type_id`. Returns `None` for parents the
/// analyzer does not handle (everything other than Struct, Union,
/// or Datasec) and for offsets that do not land on a recognizable
/// member.
///
/// Struct / Union path: matches members at exactly `byte_offset`,
/// skipping bitfields and members at non-byte-aligned bit offsets
/// (the analyzer cannot reason about either as 64-bit pointer
/// slots).
///
/// Datasec path: matches the `VarSecinfo` whose `[offset,
/// offset+size)` range contains `byte_offset`. Datasec entries are
/// laid out flat (no bitfields, no nested layout); each entry's
/// `get_type_id()` resolves to a `BTF_KIND_VAR` whose
/// `get_type_id()` returns the global's underlying C type. The
/// returned [`MemberAt::Datasec`] surfaces the underlying type id
/// so the LDX / STX paths can peel modifiers and check for a
/// plain u64 just like they do for struct members.
fn struct_member_at(btf: &Btf, parent_type_id: u32, byte_offset: u32) -> Option<MemberAt> {
    let (t, parent_type_id) = super::btf_render::peel_modifiers_with_id(btf, parent_type_id)?;
    match t {
        Type::Struct(s) | Type::Union(s) => {
            for m in &s.members {
                if matches!(m.bitfield_size(), Some(s) if s > 0) {
                    continue;
                }
                let bit_off = m.bit_offset();
                if bit_off % 8 != 0 {
                    continue;
                }
                let member_off = bit_off / 8;
                let member_type_id = m.get_type_id().ok()?;
                if member_off == byte_offset {
                    if let Some(terminal) = super::btf_render::peel_modifiers(btf, member_type_id)
                        && matches!(terminal, Type::Struct(_) | Type::Union(_))
                    {
                        return struct_member_at(btf, member_type_id, 0);
                    }
                    return Some(MemberAt::Struct {
                        member_type_id,
                        resolved_parent_type_id: parent_type_id,
                        resolved_member_offset: byte_offset,
                    });
                }
                if member_off < byte_offset
                    && let Some(terminal) = super::btf_render::peel_modifiers(btf, member_type_id)
                {
                    match &terminal {
                        Type::Array(arr) => {
                            let elem_tid = arr.get_type_id().ok()?;
                            let elem_size = super::btf_render::type_size(btf, &{
                                super::btf_render::peel_modifiers(btf, elem_tid)?
                            })? as u32;
                            if elem_size > 0 {
                                let arr_len = arr.len() as u32;
                                let arr_byte_size = elem_size * arr_len;
                                let rel = byte_offset - member_off;
                                if rel < arr_byte_size && rel.is_multiple_of(elem_size) {
                                    return Some(MemberAt::Struct {
                                        member_type_id: elem_tid,
                                        resolved_parent_type_id: parent_type_id,
                                        resolved_member_offset: byte_offset,
                                    });
                                }
                            }
                        }
                        Type::Struct(_) | Type::Union(_) => {
                            let member_size = super::btf_render::type_size(btf, &terminal)? as u32;
                            let rel = byte_offset - member_off;
                            if rel < member_size {
                                return struct_member_at(btf, member_type_id, rel);
                            }
                        }
                        _ => {}
                    }
                }
            }
            None
        }
        Type::Datasec(ds) => {
            for var_info in &ds.variables {
                let off = var_info.offset();
                let size = var_info.size() as u32;
                let end = off.checked_add(size)?;
                if byte_offset < off || byte_offset >= end {
                    continue;
                }
                // Resolve the chained Var so we can pull the
                // underlying type id. A non-Var here indicates
                // malformed BTF (libbpf always emits Var per
                // VarSecinfo); drop silently — false negative is
                // the safe direction. The check on
                // `Type::Var(...)` matches the renderer's
                // `render_datasec` shape so any future Datasec
                // variant added to btf-rs surfaces consistently
                // across both modules.
                let chained = btf.resolve_chained_type(var_info).ok()?;
                let var = match chained {
                    Type::Var(v) => v,
                    _ => return None,
                };
                let var_underlying_type_id = var.get_type_id().ok()?;
                let rel = byte_offset - off;
                if let Some(terminal) =
                    super::btf_render::peel_modifiers(btf, var_underlying_type_id)
                    && matches!(terminal, Type::Struct(_) | Type::Union(_))
                    && let Some(inner) = struct_member_at(btf, var_underlying_type_id, rel)
                {
                    return Some(inner);
                }
                return Some(MemberAt::Datasec {
                    var_underlying_type_id,
                    var_byte_offset: off,
                });
            }
            None
        }
        _ => None,
    }
}

/// Resolve a BTF type id and report whether it peels to
/// `Ptr -> Void`.
///
/// `Ptr` ids whose pointee is `0` (the BTF void marker — same
/// convention as [`FuncProto::return_type_id`] uses) match. The
/// peel walks `Const` / `Volatile` / `Restrict` / `Typedef` /
/// `TypeTag` / `DeclTag` modifiers only — bridging a `Ptr` we
/// would never want, since the result of dereferencing an
/// arbitrary modifier-wrapped type is not a useful "Ptr -> Void"
/// signal for arena-allocator detection.
///
/// Used to gate [`Analyzer::handle_kfunc_call`]'s arena-allocator
/// arm: the allowlisted kfunc names ([`ARENA_ALLOC_KFUNC_NAMES`])
/// only confer a [`RegState::ArenaU64FromAlloc`] tag when the
/// declared return is structurally `void *`. A kfunc whose name
/// drifts onto the allowlist but whose BTF return is not
/// `Ptr -> Void` cannot be misclassified here.
///
/// Returns `false` for any type id that does not resolve, peels
/// to a non-`Ptr` terminal, or whose pointee resolves to a
/// non-void type. Failure is the safe direction — false
/// negatives drop to the existing typed-pointer arm or no-op.
fn return_peels_to_ptr_void(btf: &Btf, ret_id: u32) -> bool {
    // Peel modifiers AROUND the Ptr first — `const void *` and
    // its kin lower to `Const(Ptr)` in BTF. The renderer's
    // [`super::btf_render::peel_modifiers`] handles the same
    // shape; reusing it keeps the semantics aligned with the
    // rest of the analyzer.
    //
    // The peel returns `None` for any type id that does not
    // resolve OR terminates on a non-trivial shape we cannot
    // interpret as Ptr->Void (Func, FuncProto, Var, Datasec).
    // Drop conservatively in those cases — false negatives are
    // the safe direction.
    let Some(peeled) = super::btf_render::peel_modifiers(btf, ret_id) else {
        return false;
    };
    let Type::Ptr(p) = peeled else {
        return false;
    };
    // BTF encodes `void *` with the Ptr's pointee type id == 0.
    // Same convention [`FuncProto::return_type_id`] uses for void
    // returns at the FuncProto level. Anything else is a typed
    // pointer that arm 1 (`resolve_to_struct_id`) already handled
    // — falling through here would let arm 2 mistakenly tag a
    // typed-pointer return as ArenaU64FromAlloc, the very case
    // the strict gate prevents.
    p.get_type_id().map(|id| id == 0).unwrap_or(false)
}

/// Resolve a `bpf_map_lookup_elem` call's R0 value type from the
/// caller-side map descriptor metadata in the program BTF.
///
/// The plain-helper arm of [`Analyzer::step`] looks up R1 in the
/// `.maps` `BTF_KIND_DATASEC` and types R0 as a typed pointer to
/// the map's value struct. This function performs the BTF walk:
///
/// 1. `datasec_id` must resolve to [`Type::Datasec`] whose name is
///    exactly `.maps` — the libbpf-managed user-space BTF map
///    declaration section. A `BTF_KIND_DATASEC` named anything
///    else (e.g. `.bss`, `.data`, `.data.<name>`) is rejected so
///    a non-map struct that happens to carry a `value` member of
///    pointer type cannot drive this arm.
/// 2. The datasec's `VarSecinfo` whose `offset == var_offset` is
///    located. The chained type must be [`Type::Var`] whose
///    underlying type peels through modifiers to a
///    [`Type::Struct`] / [`Type::Union`] — the per-map struct
///    declaration libbpf parses in `parse_btf_map_def`
///    (`tools/lib/bpf/libbpf.c`).
/// 3. The struct's members are scanned for one named `value`
///    (the `__type(value, T)` declaration expanded to
///    `typeof(T) *value` per `tools/lib/bpf/bpf_helpers.h`).
/// 4. The `value` member's type peels to [`Type::Ptr`] — libbpf
///    rejects non-`Ptr` value declarations (`if (!btf_is_ptr(t))`
///    in `parse_btf_map_def`).
/// 5. The `Ptr`'s pointee resolves through
///    [`super::bpf_map::resolve_to_struct_id`] to a
///    [`Type::Struct`] / [`Type::Union`] id. Maps whose value type
///    is a primitive (e.g. `__type(value, u64)` for stat counters)
///    or `void` peel to non-struct terminals; the function returns
///    `None` and the analyzer leaves R0 Unknown.
///
/// Any failure on the walk drops the whole resolution — false
/// negatives are the safe direction. The walk does NOT mutate any
/// analyzer state and does NOT consult `arena_confirmed` /
/// `arena_stx_findings`; the seeded `Pointer{T}` flows into the
/// existing kptr/arena STX paths exactly the same way a kfunc-
/// returned typed pointer does (see [`Analyzer::handle_kfunc_call`]
/// arm 1).
fn map_value_struct_id(btf: &Btf, datasec_id: u32, var_offset: u32) -> Option<u32> {
    // Gate 1: datasec must be `.maps`. Resolve the type, confirm
    // the kind, then resolve the name. Any non-Datasec kind or a
    // name resolution error drops to None — the analyzer's safe
    // direction. A renamed `.maps.foo` section (libbpf does NOT
    // rename `.maps`, but the kernel allows custom section names
    // for non-libbpf-managed BPF objects) would not match here;
    // any future need to broaden this gate must add a corresponding
    // test that proves the broader gate cannot drive a false
    // positive on a non-map datasec.
    let ty = btf.resolve_type_by_id(datasec_id).ok()?;
    let datasec = match ty {
        Type::Datasec(d) => d,
        _ => return None,
    };
    let name = btf.resolve_name(&datasec).ok()?;
    if name != ".maps" {
        return None;
    }

    // Gate 2: locate the per-map VarSecinfo. The verifier guarantees
    // VarSecinfos are non-overlapping per the BTF spec; an exact
    // offset match is the only correct lookup (a partial overlap
    // means the caller's annotation is targeting a struct member,
    // not a map descriptor — the analyzer's R1 must point at the
    // map's struct, never mid-struct, because clang's relocation
    // emission uses the var's start offset).
    let var_info = datasec
        .variables
        .iter()
        .find(|v| v.offset() == var_offset)?;
    let chained = btf.resolve_chained_type(var_info).ok()?;
    let var = match chained {
        Type::Var(v) => v,
        _ => return None,
    };
    let var_type_id = var.get_type_id().ok()?;

    // Gate 3: the var's underlying type must peel to a
    // Struct/Union — that is the map descriptor C struct emitted
    // by clang for `struct { __uint(...); __type(...); ... } name
    // SEC(".maps");`. Modifiers around the struct (Const /
    // Volatile / Typedef / TypeTag / DeclTag / Restrict) are peeled
    // by [`super::btf_render::peel_modifiers`] consistently with
    // the rest of the analyzer.
    let map_def_terminal = super::btf_render::peel_modifiers(btf, var_type_id)?;
    let map_def = match map_def_terminal {
        Type::Struct(s) => s,
        _ => return None,
    };

    // Gate 4: find the `value` member. clang's `__type(value, T)`
    // macro in `tools/lib/bpf/bpf_helpers.h` (`#define __type(name,
    // val) typeof(val) *name`) emits a struct member literally
    // named `value` whose type is `typeof(T) *`. libbpf
    // (`parse_btf_map_def`) keys on `strcmp(name, "value") == 0`
    // for this exact match — the analyzer mirrors the literal name
    // check.
    //
    // A member whose name resolution fails individually does not
    // abort the search: real map decls always have name-resolved
    // members (`type`, `key`, `value`, `max_entries`, …), but a
    // malformed BTF carrying an unnamed member should not poison
    // the lookup. `continue` past the bad name and inspect the
    // next member.
    for member in &map_def.members {
        let Ok(mname) = btf.resolve_name(member) else {
            continue;
        };
        if mname != "value" {
            continue;
        }
        // Gate 5: member type peels to `Ptr -> Struct/Union`.
        // libbpf's `parse_btf_map_def` rejects a non-Ptr `value`
        // member with `-EINVAL`; the analyzer's gate enforces the
        // same shape. A `Ptr -> u64` (stat-counter map) or
        // `Ptr -> Void` peels to a non-struct pointee and
        // `resolve_to_struct_id` returns None, dropping the
        // resolution — the renderer's existing u64 plain-counter
        // path is the correct fallback for stat maps.
        let mtype_id = member.get_type_id().ok()?;
        let mterminal = super::btf_render::peel_modifiers(btf, mtype_id)?;
        let ptr = match mterminal {
            Type::Ptr(p) => p,
            _ => return None,
        };
        let pointee = ptr.get_type_id().ok()?;
        return super::bpf_map::resolve_to_struct_id(btf, pointee);
    }
    // No `value` member: map shapes that omit a value declaration
    // (`BPF_MAP_TYPE_PROG_ARRAY` declared with `__array(values, ...)`
    // for instance) cannot have their R0 typed by this arm. Drop
    // silently.
    None
}

/// Allowlist of kfunc names whose `Ptr -> Void` return is treated
/// as an arena VA seed for [`RegState::ArenaU64FromAlloc`].
///
/// Each entry must be a real kfunc declared in the kernel's
/// `kernel/bpf/arena.c` (or peer kernel arena helpers) AND must
/// return `void *` whose runtime value is a 4 GiB-window arena
/// virtual address. Verified against the kernel source:
/// `bpf_arena_alloc_pages` is declared `__bpf_kfunc void *` per
/// linux `kernel/bpf/arena.c::bpf_arena_alloc_pages`.
///
/// Order is alphabetical for readability — the allowlist is a
/// linear-scan small-N membership test in
/// [`Analyzer::handle_kfunc_call`]. A future arena-returning
/// kfunc is added by appending its name here AND verifying its
/// return type peels to `Ptr -> Void` in the kernel BTF the
/// analyzer consumes; the strict
/// [`return_peels_to_ptr_void`] gate keeps a name-allowlist drift
/// from producing a false positive on a same-named non-arena
/// kfunc.
///
/// Distinct from the `ALLOC_SUBPROG_NAMES` allowlist in
/// [`crate::vmm::cast_analysis_load`]: that list is for in-tree
/// library subprograms (BPF-to-BPF calls with `BPF_PSEUDO_CALL`
/// + symbol resolution against the program ELF); this list is
///   for kernel kfuncs (`BPF_PSEUDO_KFUNC_CALL` + BTF id resolution
///   in [`Analyzer::handle_kfunc_call`]). The kernel kfunc and
///   in-tree subprog code paths are independent — a single name
///   belongs to exactly one of the two allowlists.
pub(crate) const ARENA_ALLOC_KFUNC_NAMES: &[&str] = &[
    // Generic BPF arena page allocator. Returns `void *` per
    // `kernel/bpf/arena.c::bpf_arena_alloc_pages` (`__bpf_kfunc
    // void *bpf_arena_alloc_pages(...)`). The runtime value is
    // either NULL or a user-side arena VA suitable for the
    // STX-flow tagging path.
    "bpf_arena_alloc_pages",
];

/// Convert the `BPF_DW`/`BPF_W`/`BPF_H`/`BPF_B` size bits to a byte
/// count. `None` for unknown encodings.
fn ldx_size_bytes(size_bits: u8) -> Option<u32> {
    match size_bits {
        BPF_SIZE_DW => Some(8),
        BPF_SIZE_W => Some(4),
        BPF_SIZE_H => Some(2),
        BPF_SIZE_B => Some(1),
        _ => None,
    }
}

/// `BpfInsn::off` is `i16`, so a negative value means the load
/// is relative to the base register at a backward offset (e.g.
/// stack-relative loads through r10). The cast pattern only
/// considers non-negative offsets — kernel struct fields never
/// have a negative byte offset relative to the struct base.
fn field_byte_offset(off: i32) -> Option<u32> {
    if off < 0 { None } else { Some(off as u32) }
}

// --- BPF instruction encoding constants --------------------------
//
// Sourced from `libbpf_rs::libbpf_sys` (which re-exports the bindgen
// translation of `linux/include/uapi/linux/bpf.h`). The analyzer
// stores opcodes in `u8` fields per the wire format, so the
// upstream `u32` constants are narrowed to typed locals below.
// Constants not exported by libbpf-sys (the standalone top-nibble
// values for `BPF_XCHG` / `BPF_CMPXCHG`) are derived from the full
// opcodes.

// BPF_CLASS_* — low 3 bits of `code` selecting the instruction
// class. Names retain the `BPF_CLASS_` prefix for parity with the
// kernel's `#define BPF_CLASS(code) ((code) & 0x07)` macro.
const BPF_CLASS_LD: u8 = bs::BPF_LD as u8;
const BPF_CLASS_LDX: u8 = bs::BPF_LDX as u8;
const BPF_CLASS_ST: u8 = bs::BPF_ST as u8;
const BPF_CLASS_STX: u8 = bs::BPF_STX as u8;
const BPF_CLASS_ALU: u8 = bs::BPF_ALU as u8;
const BPF_CLASS_JMP: u8 = bs::BPF_JMP as u8;
const BPF_CLASS_JMP32: u8 = bs::BPF_JMP32 as u8;
const BPF_CLASS_ALU64: u8 = bs::BPF_ALU64 as u8;

// BPF_SIZE_* — bits 3..4 of `code` selecting the access width.
const BPF_SIZE_W: u8 = bs::BPF_W as u8;
const BPF_SIZE_H: u8 = bs::BPF_H as u8;
const BPF_SIZE_B: u8 = bs::BPF_B as u8;
const BPF_SIZE_DW: u8 = bs::BPF_DW as u8;

// BPF_MODE_* — bits 5..7 of `code` selecting the addressing mode.
const BPF_MODE_IMM: u8 = bs::BPF_IMM as u8;
const BPF_MODE_MEM: u8 = bs::BPF_MEM as u8;
/// Atomic memory ops (BPF_STX class). `imm` selects the specific
/// operation (`BPF_XCHG`, `BPF_CMPXCHG`, `BPF_ADD | BPF_FETCH`, …).
/// See linux uapi `bpf.h`: `#define BPF_ATOMIC 0xc0`.
const BPF_MODE_ATOMIC: u8 = bs::BPF_ATOMIC as u8;

// BPF_OP_* — top 4 bits of `code` selecting the ALU / JMP op.
const BPF_OP_MOV: u8 = bs::BPF_MOV as u8;
const BPF_OP_CALL: u8 = bs::BPF_CALL as u8;
const BPF_OP_EXIT: u8 = bs::BPF_EXIT as u8;

/// Source-operand selector. `BPF_X` (== libbpf-sys `BPF_X` == 0x08)
/// signals a register source; `BPF_K` (== 0) signals an immediate.
const BPF_SRC_X: u8 = bs::BPF_X as u8;

/// Atomic-op `imm` field bit set on operations that return the prior
/// memory value. Combined with `BPF_CMPXCHG_TOP` to form `BPF_CMPXCHG`.
/// See linux uapi `bpf.h`: `#define BPF_FETCH 0x01`.
const BPF_FETCH: i32 = bs::BPF_FETCH as i32;

/// Top nibble of the atomic-op `imm` for atomic compare-and-write.
/// Combined with `BPF_FETCH` to form the full opcode. See linux uapi
/// `bpf.h`: `#define BPF_CMPXCHG (0xf0 | BPF_FETCH)`. libbpf-sys
/// exports `BPF_CMPXCHG` (the full 0xf1 opcode); the standalone top
/// nibble is derived by stripping the FETCH bit.
const BPF_CMPXCHG_TOP: i32 = (bs::BPF_CMPXCHG as i32) & !BPF_FETCH;

/// Atomic-op `imm` for `BPF_LOAD_ACQ`: `dst = smp_load_acquire(src
/// + off16)`. See linux include/linux/filter.h.
const BPF_LOAD_ACQ_IMM: i32 = bs::BPF_LOAD_ACQ as i32;
/// Atomic-op `imm` for `BPF_STORE_REL`: `smp_store_release(dst +
/// off16, src)`. See linux include/linux/filter.h.
const BPF_STORE_REL_IMM: i32 = bs::BPF_STORE_REL as i32;

/// Frame-pointer register index. See `BPF_REG_10 = 10` in linux
/// uapi `bpf.h`: r10 is the read-only frame pointer; STX/LDX through
/// it spill / reload the stack frame.
const BPF_REG_R10: usize = bs::BPF_REG_10 as usize;

/// `bpf_call->src_reg == BPF_PSEUDO_KFUNC_CALL` denotes that
/// `bpf_call->imm` is the BTF id of a `BTF_KIND_FUNC` in the running
/// kernel. Defined in linux uapi `bpf.h`.
pub(crate) const BPF_PSEUDO_KFUNC_CALL: u8 = bs::BPF_PSEUDO_KFUNC_CALL as u8;

/// Helper id for `bpf_map_lookup_elem` per linux uapi `bpf.h`
/// (`BPF_FUNC_map_lookup_elem = 1`, the second `bpf_func_id` enum
/// value after `BPF_FUNC_unspec = 0`). Sourced from `libbpf-sys`'s
/// bindgen translation of the same header.
///
/// Helper calls in pre-relocation `.bpf.o` bytecode carry
/// `src_reg == 0` (plain helper, distinct from `BPF_PSEUDO_CALL`
/// for BPF-to-BPF and `BPF_PSEUDO_KFUNC_CALL` for kfuncs) and
/// `imm` set to the helper id. The analyzer's [`BPF_OP_CALL`] arm
/// types R0 only for this single helper id — no other helper has a
/// pointer-to-struct return shape we can recover from the BPF
/// program BTF alone. The kernel's
/// `bpf_map_lookup_elem_proto::ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL`
/// (linux `kernel/bpf/helpers.c`) is the correctness anchor: the
/// returned pointer points at the map's value bytes whose BTF type
/// is the map descriptor's `__type(value, T)` declaration.
const BPF_FUNC_MAP_LOOKUP_ELEM: i32 = bs::BPF_FUNC_map_lookup_elem as i32;
const BPF_FUNC_MAP_UPDATE_ELEM: i32 = bs::BPF_FUNC_map_update_elem as i32;
const BPF_FUNC_MAP_LOOKUP_PERCPU_ELEM: i32 = bs::BPF_FUNC_map_lookup_percpu_elem as i32;
const BPF_OP_ADD: u8 = bs::BPF_ADD as u8;

/// `bpf_call->src_reg == BPF_PSEUDO_CALL` denotes a BPF-to-BPF call:
/// `bpf_call->imm` is a pc-relative offset to another BPF function
/// in the same program. Pre-relocation `.bpf.o` files (the production
/// path) emit kfunc call sites with `src_reg = BPF_PSEUDO_CALL` and
/// `imm = -1`; libbpf's RELO_EXTERN_CALL handler rewrites them to
/// `src_reg = BPF_PSEUDO_KFUNC_CALL` + `imm = kfunc_btf_id` at load
/// time. The host-side cast loader mirrors that rewrite via
/// [`crate::vmm::cast_analysis_load`] before invoking
/// [`analyze_casts`], so the analyzer never has to distinguish pre-
/// from post-relocation forms — by the time it runs every kfunc
/// call carries its BTF id.
pub(crate) const BPF_PSEUDO_CALL: u8 = bs::BPF_PSEUDO_CALL as u8;

#[cfg(test)]
mod tests;