relon-codegen-llvm 0.1.0-rc2

LLVM-backed AOT evaluator for Relon (Phase A bootstrap)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
//! `Op`-family: memory / buffer-protocol I/O + arena addressing.
//!
//! LoadField/StoreField (scalar buffer slots), the pointer-indirect
//! param loads, raw `Load*/Store*AtAbsolute`, memcpy, scratch alloc, and
//! the arena-relative address composition helpers. `LoadFieldAtAbsolute`
//! (the dynamic-base schema-field load) is lowered here too via
//! `lower_mem_rest` (Phase 0b).

use inkwell::values::{BasicValueEnum, IntValue, PointerValue};

use relon_ir::ir::{IrType, Op};

use crate::error::LlvmError;
use crate::state::{
    NativeTrap, ARENA_STATE_OFFSET_LEN, ARENA_STATE_OFFSET_SCRATCH_BASE,
    ARENA_STATE_OFFSET_SCRATCH_CURSOR, ARENA_STATE_OFFSET_TAIL_CURSOR,
};

use super::*;

/// Variants of the absolute-pointer load lowering paths.
#[derive(Clone, Copy)]
pub(crate) enum AbsLoad {
    I32,
    I64,
    I8U,
    F64,
}

/// Variants of the absolute-pointer store lowering paths.
#[derive(Clone, Copy)]
pub(crate) enum AbsStore {
    I32,
    I64,
    I8,
    F64,
}

impl<'ctx, 'b, 'cp> Emit<'ctx, 'b, 'cp> {
    /// Phase 0b seam: absolute-addressed field load
    /// (`LoadFieldAtAbsolute`). Dispatched from `super::lower_op`.
    ///
    /// Semantics (see `relon_ir::ir::Op::LoadFieldAtAbsolute`): pop an
    /// i32 arena-relative address pointing at the first byte of a
    /// schema instance's fixed area, then push the field at `offset`
    /// of type `ty`. This is the dynamic-base sibling of
    /// [`Self::emit_load_field`] — instead of implicitly reading the
    /// `in_ptr` handshake slot, the base address rides the operand
    /// stack. The address composition (`arena_base + addr + offset`)
    /// reuses [`Self::compose_abs_addr`], which keeps the i32-arena
    /// offset width-agnostic (zext to i64 then GEP from the i8*
    /// `arena_base`), so no 64-bit pointer width is baked in — see the
    /// `TODO(P3-wasm32)` note this file already carries.
    pub(crate) fn lower_mem_rest(
        &mut self,
        ip: usize,
        ip_hint: &str,
        op: &Op,
    ) -> Result<(), LlvmError> {
        match op {
            Op::LoadFieldAtAbsolute { offset, ty } => {
                self.emit_load_field_at_absolute(ip_hint, *offset, *ty)
            }
            _ => Err(LlvmError::Codegen(format!(
                "unsupported op (Phase 0b mem seam): {op:?} at ip={ip}"
            ))),
        }
    }

    /// Lower `Op::LoadFieldAtAbsolute { offset, ty }`. Stack:
    /// `[i32 addr] -> [T]`. Pops the arena-relative base address,
    /// composes `arena_base + addr + offset`, loads `ty`, and pushes
    /// the result on the int-typed virtual stack. The per-type load /
    /// widen logic mirrors [`Self::emit_load_field`]: F64 loads a
    /// `double` then bit-casts to i64 bits so the operand stack stays
    /// integer-typed, and Bool / Unit (i8 on the wire) zero-extend to
    /// i32 to match the IR's virtual-stack convention.
    ///
    /// Emits an arena bounds check before the field load so a corrupt
    /// pointer records `BoundsViolation` instead of forming an invalid
    /// host pointer.
    pub(crate) fn emit_load_field_at_absolute(
        &mut self,
        ip_hint: &str,
        offset: u32,
        ty: IrType,
    ) -> Result<(), LlvmError> {
        let base = self.pop_int(ip_hint)?;
        let access_size = self.field_access_size(ty)?;
        let addr = self.compose_abs_addr(base, offset, access_size, "LoadFieldAtAbsolute")?;
        // Pointer-indirect schema field (`String` / `List<scalar>` /
        // `List<String>` / `List<Schema>` / `List<List<scalar>>`): the slot
        // holds a 4-byte offset to the field's tail record. F1: the input
        // marshaller baked `in_ptr` into the slot recursively
        // (`finish_arena_absolute`), so the loaded value is already the
        // arena-absolute record root pointer every consumer (`ReadStringLen`,
        // list index, the in-place return sentinel / cross-region object
        // slot) expects — no `+ in_ptr` rebase, no re-encode. F4 adds
        // `ListSchema` / `ListList` here so a parameter-field list return
        // (`o.items` / `o.grid`) is bit-equal to tree-walk; mirrors the
        // cranelift `emit_load_field_at_absolute`.
        if matches!(
            ty,
            IrType::String
                | IrType::ListInt
                | IrType::ListFloat
                | IrType::ListBool
                | IrType::ListString
                | IrType::ListSchema
                | IrType::ListList
        ) {
            let name = self.next_name("loadfa_ptr_arena_rel");
            let arena_rel = self
                .builder
                .build_load(self.ctx.i32_type(), addr, &name)
                .map_err(|e| LlvmError::Codegen(format!("LoadFieldAtAbsolute ptr load: {e}")))?
                .into_int_value();
            self.push(arena_rel, ty);
            return Ok(());
        }
        if ty == IrType::F64 {
            let name = self.next_name("loadfa_f64");
            let f = self
                .builder
                .build_load(self.ctx.f64_type(), addr, &name)
                .map_err(|e| LlvmError::Codegen(format!("LoadFieldAtAbsolute f64 load: {e}")))?;
            let bits = self
                .builder
                .build_bit_cast(f, self.ctx.i64_type(), &self.next_name("loadfa_f64_bits"))
                .map_err(|e| LlvmError::Codegen(format!("LoadFieldAtAbsolute f64 bitcast: {e}")))?
                .into_int_value();
            self.push(bits, IrType::F64);
            return Ok(());
        }
        let (llvm_ty, push_ty) = self.field_load_kind(ty)?;
        let name = self.next_name("loadfa");
        let raw = self
            .builder
            .build_load(llvm_ty, addr, &name)
            .map_err(|e| LlvmError::Codegen(format!("LoadFieldAtAbsolute load: {e}")))?
            .into_int_value();
        let widened = match push_ty {
            IrType::Bool | IrType::Unit => {
                let name = self.next_name("loadfa_zext");
                self.builder
                    .build_int_z_extend(raw, self.ctx.i32_type(), &name)
                    .map_err(|e| LlvmError::Codegen(format!("LoadFieldAtAbsolute zext: {e}")))?
            }
            _ => raw,
        };
        self.push(widened, push_ty);
        Ok(())
    }

    /// Emit a LoadField — buffer-protocol only. The LLVM IR loads
    /// `arena_base + in_ptr + offset` for a value of `ty`. Phase D.1
    /// fast-path mode short-circuits this into a direct LLVM param
    /// access against the matching arg slot.
    pub(crate) fn emit_load_field(&mut self, offset: u32, ty: IrType) -> Result<(), LlvmError> {
        // Phase D.1 fast path: lift the buffer-protocol field load
        // into a direct LLVM param read whenever the field's offset
        // matches one of the profile's declared arg offsets.
        if let Some(fast) = self.fast_path.as_ref() {
            if ty != IrType::I64 {
                return Err(LlvmError::Codegen(format!(
                    "fast-path LoadField: only I64 args supported, got {ty:?}"
                )));
            }
            let slot = fast
                .profile
                .arg_offsets
                .iter()
                .position(|&o| o == offset)
                .ok_or_else(|| {
                    LlvmError::Codegen(format!(
                        "fast-path LoadField: offset {offset} not in profile.arg_offsets"
                    ))
                })?;
            // LLVM param `slot` is the i64 arg directly under the
            // fast-entry signature (no implicit state slot, no
            // handshake i32 quartet).
            let p = self.func.get_nth_param(slot as u32).ok_or_else(|| {
                LlvmError::Codegen(format!(
                    "fast-path LoadField: llvm param #{slot} missing on function"
                ))
            })?;
            let v = p.into_int_value();
            self.push(v, IrType::I64);
            return Ok(());
        }
        let arena_base_ptr = self.arena_base_ptr.ok_or_else(|| {
            LlvmError::Codegen("LoadField outside buffer-protocol entry shape".into())
        })?;
        let in_ptr_i32 = self.lookup_param(0)?; // IR LocalGet(0) == in_ptr
        let access_size = self.field_access_size(ty)?;
        let addr = self.compute_buffer_addr(arena_base_ptr, in_ptr_i32, offset, access_size)?;
        // AOT-1: an F64 field is stored as 8 LE bytes; load it as a
        // `double`, then bit-cast to i64 bits so the operand stack stays
        // integer-typed (Option B). Routing it through `field_load_kind`
        // would yield a `FloatValue` that the shared `.into_int_value()`
        // tail below cannot consume.
        if ty == IrType::F64 {
            let name = self.next_name("loadf_f64");
            let f = self
                .builder
                .build_load(self.ctx.f64_type(), addr, &name)
                .map_err(|e| LlvmError::Codegen(format!("LoadField f64 load: {e}")))?;
            let bits = self
                .builder
                .build_bit_cast(f, self.ctx.i64_type(), &self.next_name("loadf_f64_bits"))
                .map_err(|e| LlvmError::Codegen(format!("LoadField f64 bitcast: {e}")))?
                .into_int_value();
            self.push(bits, IrType::F64);
            return Ok(());
        }
        let (llvm_ty, push_ty) = self.field_load_kind(ty)?;
        let name = self.next_name("loadf");
        let raw = self
            .builder
            .build_load(llvm_ty, addr, &name)
            .map_err(|e| LlvmError::Codegen(format!("LoadField load: {e}")))?
            .into_int_value();
        // Widen Bool / Unit (i8 on the wire) to i32 to match the IR's
        // virtual-stack convention; I32 / I64 / I8-tagged-as-Unit are
        // already the correct width.
        let widened = match push_ty {
            IrType::Bool | IrType::Unit => {
                let name = self.next_name("loadf_zext");
                self.builder
                    .build_int_z_extend(raw, self.ctx.i32_type(), &name)
                    .map_err(|e| LlvmError::Codegen(format!("LoadField zext: {e}")))?
            }
            _ => raw,
        };
        self.push(widened, push_ty);
        Ok(())
    }

    pub(crate) fn emit_store_field(
        &mut self,
        ip_hint: &str,
        offset: u32,
        ty: IrType,
        inplace: bool,
    ) -> Result<(), LlvmError> {
        if inplace {
            // In-place region-walk return ABI (S1/S2 `List<List<scalar>>`,
            // S3 `List<String>`, S4 `List<Schema>`), mirroring the cranelift
            // backend. The IR lowering only sets `inplace` for a
            // pointer-array list value sourced directly from a `#main`
            // parameter identity — its root header lives in the input region
            // and the value is self-contained there (the single-region
            // invariant). Rather than relocate the non-contiguous in-buffer
            // block (the old rejecting / segfaulting path on `List<String>`),
            // we pop the arena-relative root pointer that
            // `Op::LoadListListPtr` / `Op::LoadListStringPtr` /
            // `Op::LoadListSchemaPtr` pushed and stash it; the buffer
            // epilogue returns it as the negative in-place sentinel. The
            // fixed-area slot at `offset` is left untouched — the host
            // ignores `out_buf` entirely for an in-place return and reads
            // the root at the reported arena offset, gated by the verifier.
            //
            // Only pointer-array list types are ever marked in-place; a
            // `true` flag on any other type is a lowering bug. Only one
            // root return exists per `#main`, so a single stash slot
            // suffices; a second in-place store is a lowering bug.
            if !matches!(
                ty,
                IrType::ListList | IrType::ListString | IrType::ListSchema
            ) {
                return Err(LlvmError::Codegen(format!(
                    "in-place StoreField on non-pointer-array type {ty:?} — lowering bug",
                )));
            }
            if self.inplace_return_root.is_some() {
                return Err(LlvmError::Codegen(
                    "multiple in-place StoreField in one #main body — in-place return expects a \
                     single root value"
                        .into(),
                ));
            }
            let _ = offset;
            let root = self.pop_int(ip_hint)?;
            self.inplace_return_root = Some(root);
            return Ok(());
        }
        // Phase E.1: pointer-indirect types (String / List*) route to
        // the tail-cursor protocol — bump-allocate inside the output
        // buffer's tail region, memcpy the record there, and stamp
        // the buffer-relative offset into the fixed-area slot. Comes
        // before the Phase D.1 fast-path check because the fast path
        // explicitly rejects non-I64 stores.
        if matches!(
            ty,
            IrType::String | IrType::ListInt | IrType::ListFloat | IrType::ListBool
        ) {
            return self.emit_store_field_pointer_indirect(ip_hint, offset, ty);
        }
        // `List<String>` is pointer-indirect *and* pointer-array: the
        // record header `[len][off_0..off_{N-1}]` carries arena-relative
        // offsets to per-entry String sub-records, so it needs the whole
        // contiguous block copied into the tail plus a relocation of each
        // inner offset into the output buffer's coordinate system.
        if matches!(ty, IrType::ListString) {
            return self.emit_store_field_list_string(ip_hint, offset);
        }
        if matches!(ty, IrType::ListList) {
            // A non-in-place `StoreField { ty: ListList }` has no copy
            // producer today (every `List<List>` return is either the
            // in-place param walk handled above or a loud cap at lowering),
            // so reaching here is ABI drift — surface it loudly.
            return Err(LlvmError::Codegen(
                "non-in-place StoreField { ty: ListList } has no copy path".into(),
            ));
        }
        if matches!(ty, IrType::ListSchema) {
            // A non-in-place `StoreField { ty: ListSchema }` has no copy
            // producer today (the only supported `List<Schema>` return is
            // the in-place param-identity walk handled above; const-pool /
            // field-sourced `List<Schema>` is capped loudly at lowering), so
            // reaching here is ABI drift — surface it loudly rather than
            // fall through to the i64 store path.
            return Err(LlvmError::Codegen(
                "non-in-place StoreField { ty: ListSchema } has no copy path".into(),
            ));
        }
        // Phase D.1 fast path: rewrite trailing StoreField into a
        // store against the i64 ret_slot. Only the single Int return
        // slot is supported — any other offset means the IR is past
        // the fast-path envelope (multi-field record, tail-cursor
        // payload) and we reject.
        if let Some(fast) = self.fast_path.clone() {
            if ty != IrType::I64 {
                return Err(LlvmError::Codegen(format!(
                    "fast-path StoreField: only I64 returns supported, got {ty:?}"
                )));
            }
            if offset != fast.profile.ret_offset {
                return Err(LlvmError::Codegen(format!(
                    "fast-path StoreField: offset {offset} != profile.ret_offset {}",
                    fast.profile.ret_offset
                )));
            }
            let v = self.pop_int(ip_hint)?;
            self.builder
                .build_store(fast.ret_slot, v)
                .map_err(|e| LlvmError::Codegen(format!("fast StoreField ret_slot: {e}")))?;
            return Ok(());
        }
        let arena_base_ptr = self.arena_base_ptr.ok_or_else(|| {
            LlvmError::Codegen("StoreField outside buffer-protocol entry shape".into())
        })?;
        let out_ptr_i32 = self.lookup_param(2)?; // IR LocalGet(2) == out_ptr
        let access_size = self.field_access_size(ty)?;
        let addr = self.compute_buffer_addr(arena_base_ptr, out_ptr_i32, offset, access_size)?;
        let v = self.pop_int(ip_hint)?;
        let store_val: BasicValueEnum<'ctx> = match ty {
            IrType::I64 => v.into(),
            IrType::I32 => v.into(),
            IrType::F64 => {
                // The IR's virtual stack carries f64 as bit-cast i64;
                // we don't see ConstF64 / Add(F64) in the Phase B
                // envelope, but a future LoadField -> StoreField pair
                // could leave an i64 on the stack tagged as F64.
                // Treat it as an i64 store; the bit-cast happens at
                // the host side.
                v.into()
            }
            IrType::Bool | IrType::Unit => {
                // Narrow the i32 to i8 before storing.
                let name = self.next_name("storef_trunc");
                let narrowed = self
                    .builder
                    .build_int_truncate(v, self.ctx.i8_type(), &name)
                    .map_err(|e| LlvmError::Codegen(format!("StoreField trunc: {e}")))?;
                narrowed.into()
            }
            other => {
                return Err(LlvmError::Codegen(format!(
                    "StoreField: Phase B envelope rejects {other:?}"
                )));
            }
        };
        self.builder
            .build_store(addr, store_val)
            .map_err(|e| LlvmError::Codegen(format!("StoreField store: {e}")))?;
        Ok(())
    }

    /// Compute `arena_base + buf_ptr + offset` as an LLVM pointer after
    /// checking `[buf_ptr + offset, buf_ptr + offset + access_size)` lies
    /// within the arena.
    /// The result is a typed-stripped opaque pointer suitable for any
    /// `load` / `store` width.
    pub(crate) fn compute_buffer_addr(
        &mut self,
        arena_base_ptr: PointerValue<'ctx>,
        buf_ptr_i32: IntValue<'ctx>,
        offset: u32,
        access_size: u32,
    ) -> Result<PointerValue<'ctx>, LlvmError> {
        let i32_t = self.ctx.i32_type();
        let i64_t = self.ctx.i64_type();
        let i8_t = self.ctx.i8_type();
        // Widen `buf_ptr_i32` to i64 (zero-extend — wasm semantics
        // treat the i32 as an unsigned byte offset).
        let name = self.next_name("buf_ptr_zext");
        let buf_ptr64 = self
            .builder
            .build_int_z_extend(buf_ptr_i32, i64_t, &name)
            .map_err(|e| LlvmError::Codegen(format!("buf_ptr zext: {e}")))?;
        let off_const = i32_t.const_int(u64::from(offset), false);
        let off64 = self
            .builder
            .build_int_z_extend(off_const, i64_t, "off_zext")
            .map_err(|e| LlvmError::Codegen(format!("offset zext: {e}")))?;
        let name = self.next_name("buf_off");
        let combined = self
            .builder
            .build_int_add(buf_ptr64, off64, &name)
            .map_err(|e| LlvmError::Codegen(format!("buf_ptr + offset: {e}")))?;
        self.emit_arena_bounds_check_const(combined, access_size, "buffer_addr")?;
        // GEP from the cached arena_base pointer (which is an i8*)
        // by the combined byte offset.
        let name = self.next_name("field_addr");
        let addr = unsafe {
            self.builder
                .build_in_bounds_gep(i8_t, arena_base_ptr, &[combined], &name)
                .map_err(|e| LlvmError::Codegen(format!("field GEP: {e}")))?
        };
        Ok(addr)
    }

    pub(crate) fn field_load_kind(
        &self,
        ty: IrType,
    ) -> Result<(inkwell::types::BasicTypeEnum<'ctx>, IrType), LlvmError> {
        let pair: (inkwell::types::BasicTypeEnum<'ctx>, IrType) = match ty {
            IrType::I64 => (self.ctx.i64_type().into(), IrType::I64),
            IrType::I32 => (self.ctx.i32_type().into(), IrType::I32),
            IrType::F64 => (self.ctx.f64_type().into(), IrType::F64),
            IrType::Bool => (self.ctx.i8_type().into(), IrType::Bool),
            IrType::Unit => (self.ctx.i8_type().into(), IrType::Unit),
            other => {
                return Err(LlvmError::Codegen(format!(
                    "LoadField: Phase B envelope rejects {other:?}"
                )));
            }
        };
        Ok(pair)
    }

    pub(crate) fn field_access_size(&self, ty: IrType) -> Result<u32, LlvmError> {
        match ty {
            IrType::I64 | IrType::F64 => Ok(8),
            IrType::I32
            | IrType::String
            | IrType::ListInt
            | IrType::ListFloat
            | IrType::ListBool
            | IrType::ListString
            | IrType::ListSchema
            | IrType::ListList
            | IrType::Closure
            | IrType::Dict => Ok(4),
            IrType::Bool | IrType::Unit => Ok(1),
        }
    }

    /// Phase 2 surface-widening: lower `Op::ReadStringLen` — pop an
    /// arena-relative i32 record pointer (String or List* header),
    /// load the leading 4-byte length prefix, zext to i64, push.
    /// Mirrors `relon-codegen-cranelift::codegen::field::emit_read_string_len`.
    ///
    pub(crate) fn emit_read_string_len(&mut self, ip_hint: &str) -> Result<(), LlvmError> {
        let ptr_i32 = self.pop_int(ip_hint)?;
        let addr = self.arena_addr_i32_checked_const(ptr_i32, 4, "ReadStringLen")?;
        let name = self.next_name("rs_len");
        let len_i32 = self
            .builder
            .build_load(self.ctx.i32_type(), addr, &name)
            .map_err(|e| LlvmError::Codegen(format!("ReadStringLen load: {e}")))?
            .into_int_value();
        let name = self.next_name("rs_len64");
        let len_i64 = self
            .builder
            .build_int_z_extend(len_i32, self.ctx.i64_type(), &name)
            .map_err(|e| LlvmError::Codegen(format!("ReadStringLen zext: {e}")))?;
        self.push(len_i64, IrType::I64);
        Ok(())
    }

    /// Phase 2 surface-widening: lower `Op::LoadStringPtr` (and its
    /// List* siblings) — `#main`-side String / List parameter loads.
    ///
    /// The IR's lowering pass emits this op whenever a `#main(String s)`
    /// (or List-typed) parameter is referenced; the buffer-protocol
    /// trampoline laid the matching record pointer (a 4-byte
    /// buffer-relative offset) at `offset` bytes inside the input
    /// record. We materialise the offset on the operand stack as an
    /// `IrType::String` (or matching List type) so downstream ops
    /// (`ReadStringLen`, `Op::Call { contains }`, list-method
    /// dispatch) see the same shape they would inside a freshly-
    /// produced literal.
    ///
    /// `IR LocalGet(0)` reads the buffer-protocol entry's `in_ptr`
    /// param (slot 1 on LLVM under `param_base = 1`); the pointer-
    /// indirect slot lives at that address plus `offset`. The
    /// resulting load is a plain i32, so we don't go through
    /// `field_load_kind`'s zext / type-tagging logic.
    ///
    /// The slot value the host marshalled into the input buffer is
    /// **input-buffer-relative** (relative to `in_ptr`, the start of the
    /// input record — `BufferBuilder` lays the tail record into the input
    /// buffer and back-patches a buffer-relative offset). Every operand-
    /// stack pointer consumer downstream (`ReadStringLen`, the
    /// pointer-indirect `StoreField` /
    /// `EmitTailRecordFromAbsoluteAddr` tail-record copy, `Op::Call`
    /// stdlib helpers) treats the pointer as **arena-relative** — the
    /// same coordinate the const-pool / scratch producers push. So we
    /// rebase the loaded offset by `in_ptr` here, once at the source,
    /// instead of teaching every consumer about the param-vs-const
    /// distinction. Without this rebase, returning a list / string
    /// parameter by identity (`#main(List<Int> xs) -> List<Int> = xs`)
    /// copies the record from `arena_base + offset` (wrong region) and
    /// emits garbage; see `tests/aot_list_param_return.rs`.
    pub(crate) fn emit_load_pointer_indirect_param(
        &mut self,
        offset: u32,
        ty: IrType,
    ) -> Result<(), LlvmError> {
        let arena_base_ptr = self.arena_base_ptr.ok_or_else(|| {
            LlvmError::Codegen(format!(
                "Op::Load*Ptr({ty:?}) outside buffer-protocol entry shape"
            ))
        })?;
        let in_ptr_i32 = self.lookup_param(0)?; // IR LocalGet(0) == in_ptr
        let addr = self.compute_buffer_addr(arena_base_ptr, in_ptr_i32, offset, 4)?;
        // F1: the input marshaller baked `in_ptr` into the slot
        // (`finish_arena_absolute`), so the loaded value is already the
        // arena-relative offset the uniform `arena_base + ptr` resolution
        // every downstream consumer uses expects — no `+ in_ptr` rebase.
        let name = self.next_name("loadptr_arena_rel");
        let arena_rel = self
            .builder
            .build_load(self.ctx.i32_type(), addr, &name)
            .map_err(|e| LlvmError::Codegen(format!("Load*Ptr load: {e}")))?
            .into_int_value();
        self.push(arena_rel, ty);
        Ok(())
    }

    /// Compute `align_up(value + add, align)` as an i32 value. `align`
    /// must be a power of two (the record alignments are 4 / 8); for
    /// `align <= 1` the rounding is a no-op and the result is `value +
    /// add`. Used by the pointer-indirect record copy to resolve a
    /// record's inner payload position (`align_up(record_start + 4,
    /// align)`) from either the source or destination record start.
    pub(crate) fn align_up_const(
        &mut self,
        value: IntValue<'ctx>,
        add: u32,
        align: u32,
        label: &str,
    ) -> Result<IntValue<'ctx>, LlvmError> {
        let i32_t = self.ctx.i32_type();
        let summed = self
            .builder
            .build_int_add(
                value,
                i32_t.const_int(u64::from(add), false),
                &format!("{label}_sum"),
            )
            .map_err(|e| LlvmError::Codegen(format!("{label} add: {e}")))?;
        if align <= 1 {
            return Ok(summed);
        }
        let bumped = self
            .builder
            .build_int_add(
                summed,
                i32_t.const_int(u64::from(align - 1), false),
                &format!("{label}_bump"),
            )
            .map_err(|e| LlvmError::Codegen(format!("{label} align bump: {e}")))?;
        let mask = i32_t.const_int(u64::from(!(align - 1)), false);
        self.builder
            .build_and(bumped, mask, &format!("{label}_align"))
            .map_err(|e| LlvmError::Codegen(format!("{label} align and: {e}")))
    }

    /// Compute `arena_base + off_i32` only after checking the accessed
    /// byte range against `ArenaState::arena_len`.
    pub(crate) fn arena_addr_i32_checked_const(
        &mut self,
        off_i32: IntValue<'ctx>,
        access_size: u32,
        label: &str,
    ) -> Result<PointerValue<'ctx>, LlvmError> {
        let len = self.ctx.i32_type().const_int(u64::from(access_size), false);
        self.arena_addr_i32_checked(off_i32, len, label)
    }

    pub(crate) fn arena_addr_i32_checked(
        &mut self,
        off_i32: IntValue<'ctx>,
        access_len: IntValue<'ctx>,
        label: &str,
    ) -> Result<PointerValue<'ctx>, LlvmError> {
        let arena_base_ptr = self.arena_base_ptr.ok_or_else(|| {
            LlvmError::Codegen("absolute load/store outside buffer-protocol entry shape".into())
        })?;
        self.emit_arena_bounds_check(off_i32, access_len, label)?;
        self.arena_addr_from_base_offset(arena_base_ptr, off_i32, "abs")
    }

    pub(crate) fn arena_addr_i32_offset_checked_const(
        &mut self,
        base: IntValue<'ctx>,
        offset: u32,
        access_size: u32,
        label: &str,
    ) -> Result<PointerValue<'ctx>, LlvmError> {
        let len = self.ctx.i32_type().const_int(u64::from(access_size), false);
        self.arena_addr_i32_offset_checked(base, offset, len, label)
    }

    pub(crate) fn arena_addr_i32_offset_checked(
        &mut self,
        base: IntValue<'ctx>,
        offset: u32,
        access_len: IntValue<'ctx>,
        label: &str,
    ) -> Result<PointerValue<'ctx>, LlvmError> {
        let arena_base_ptr = self.arena_base_ptr.ok_or_else(|| {
            LlvmError::Codegen("absolute load/store outside buffer-protocol entry shape".into())
        })?;
        let base64 = self.zext_to_i64(base, label, "base")?;
        let off_const = self.ctx.i64_type().const_int(u64::from(offset), false);
        let name = self.next_name("abs_offset_compose");
        let composed = self
            .builder
            .build_int_add(base64, off_const, &name)
            .map_err(|e| LlvmError::Codegen(format!("{label} offset compose add: {e}")))?;
        self.emit_arena_bounds_check(composed, access_len, label)?;
        self.arena_addr_from_base_offset(arena_base_ptr, composed, "abs")
    }

    pub(crate) fn arena_addr_from_base_offset(
        &mut self,
        arena_base_ptr: PointerValue<'ctx>,
        offset: IntValue<'ctx>,
        prefix: &str,
    ) -> Result<PointerValue<'ctx>, LlvmError> {
        let i8_t = self.ctx.i8_type();
        let off64 = self.zext_to_i64(offset, prefix, "off")?;
        let name = self.next_name(&format!("{prefix}_addr"));
        let addr = unsafe {
            self.builder
                .build_in_bounds_gep(i8_t, arena_base_ptr, &[off64], &name)
                .map_err(|e| LlvmError::Codegen(format!("abs GEP: {e}")))?
        };
        Ok(addr)
    }

    pub(crate) fn emit_arena_bounds_check_const(
        &mut self,
        offset: IntValue<'ctx>,
        access_size: u32,
        label: &str,
    ) -> Result<(), LlvmError> {
        let len = self.ctx.i32_type().const_int(u64::from(access_size), false);
        self.emit_arena_bounds_check(offset, len, label)
    }

    fn emit_arena_bounds_check(
        &mut self,
        offset: IntValue<'ctx>,
        access_len: IntValue<'ctx>,
        label: &str,
    ) -> Result<(), LlvmError> {
        let state_ptr = self.state_ptr.ok_or_else(|| {
            LlvmError::Codegen(format!("{label}: bounds check requires state ptr"))
        })?;
        let i8_t = self.ctx.i8_type();
        let i32_t = self.ctx.i32_type();
        let i64_t = self.ctx.i64_type();
        let len_gep = unsafe {
            self.builder
                .build_in_bounds_gep(
                    i8_t,
                    state_ptr,
                    &[i32_t.const_int(u64::from(ARENA_STATE_OFFSET_LEN), false)],
                    &self.next_name("bounds_len_gep"),
                )
                .map_err(|e| LlvmError::Codegen(format!("{label} bounds len GEP: {e}")))?
        };
        let arena_len32 = self
            .builder
            .build_load(i32_t, len_gep, &self.next_name("bounds_arena_len"))
            .map_err(|e| LlvmError::Codegen(format!("{label} bounds len load: {e}")))?
            .into_int_value();
        let arena_len64 = self
            .builder
            .build_int_z_extend(arena_len32, i64_t, &self.next_name("bounds_len64"))
            .map_err(|e| LlvmError::Codegen(format!("{label} bounds len zext: {e}")))?;
        let off64 = self.zext_to_i64(offset, label, "off")?;
        let access64 = self.zext_to_i64(access_len, label, "access")?;
        let end64 = self
            .builder
            .build_int_add(off64, access64, &self.next_name("bounds_end"))
            .map_err(|e| LlvmError::Codegen(format!("{label} bounds end add: {e}")))?;
        let out_of_bounds = self
            .builder
            .build_int_compare(
                inkwell::IntPredicate::UGT,
                end64,
                arena_len64,
                &self.next_name("bounds_oob"),
            )
            .map_err(|e| LlvmError::Codegen(format!("{label} bounds cmp: {e}")))?;
        let trap_bb = self.ctx.append_basic_block(self.func, "bounds_trap");
        let ok_bb = self.ctx.append_basic_block(self.func, "bounds_ok");
        self.builder
            .build_conditional_branch(out_of_bounds, trap_bb, ok_bb)
            .map_err(|e| LlvmError::Codegen(format!("{label} bounds branch: {e}")))?;

        self.builder.position_at_end(trap_bb);
        if self.shape == EntryShape::Buffer {
            self.emit_state_trap(NativeTrap::BoundsViolation, label)?;
        } else {
            self.emit_llvm_trap_call(label)?;
            self.builder
                .build_unreachable()
                .map_err(|e| LlvmError::Codegen(format!("{label} bounds unreachable: {e}")))?;
        }
        self.builder.position_at_end(ok_bb);
        Ok(())
    }

    fn zext_to_i64(
        &mut self,
        value: IntValue<'ctx>,
        label: &str,
        name: &str,
    ) -> Result<IntValue<'ctx>, LlvmError> {
        let width = value.get_type().get_bit_width();
        if width == 64 {
            return Ok(value);
        }
        if width > 64 {
            return Err(LlvmError::Codegen(format!(
                "{label} bounds {name}: i{width} is wider than i64"
            )));
        }
        self.builder
            .build_int_z_extend(
                value,
                self.ctx.i64_type(),
                &self.next_name(&format!("bounds_{name}64")),
            )
            .map_err(|e| LlvmError::Codegen(format!("{label} bounds {name} zext: {e}")))
    }

    /// Compose `base + offset` into the absolute pointer each
    /// `Load*AtAbsolute` / `Store*AtAbsolute` op reads from.
    ///
    /// Widen before adding the constant so a hostile/corrupt base near
    /// `u32::MAX` cannot wrap in i32 and then pass the bounds check as a
    /// small offset.
    pub(crate) fn compose_abs_addr(
        &mut self,
        base: IntValue<'ctx>,
        offset: u32,
        access_size: u32,
        label: &str,
    ) -> Result<PointerValue<'ctx>, LlvmError> {
        self.arena_addr_i32_offset_checked_const(base, offset, access_size, label)
    }

    pub(crate) fn emit_load_at_absolute(
        &mut self,
        ip_hint: &str,
        offset: u32,
        kind: AbsLoad,
    ) -> Result<(), LlvmError> {
        let base = self.pop_int(ip_hint)?;
        let access_size = match kind {
            AbsLoad::I32 => 4,
            AbsLoad::I64 => 8,
            AbsLoad::I8U => 1,
            AbsLoad::F64 => 8,
        };
        let addr = self.compose_abs_addr(base, offset, access_size, "LoadAtAbsolute")?;
        match kind {
            AbsLoad::I32 => {
                let name = self.next_name("loadi32_abs");
                let v = self
                    .builder
                    .build_load(self.ctx.i32_type(), addr, &name)
                    .map_err(|e| LlvmError::Codegen(format!("LoadI32AtAbsolute: {e}")))?
                    .into_int_value();
                self.push(v, IrType::I32);
            }
            AbsLoad::I64 => {
                let name = self.next_name("loadi64_abs");
                let v = self
                    .builder
                    .build_load(self.ctx.i64_type(), addr, &name)
                    .map_err(|e| LlvmError::Codegen(format!("LoadI64AtAbsolute: {e}")))?
                    .into_int_value();
                self.push(v, IrType::I64);
            }
            AbsLoad::I8U => {
                let name = self.next_name("loadi8u_abs");
                let b = self
                    .builder
                    .build_load(self.ctx.i8_type(), addr, &name)
                    .map_err(|e| LlvmError::Codegen(format!("LoadI8UAtAbsolute: {e}")))?
                    .into_int_value();
                let name = self.next_name("loadi8u_zext");
                let v = self
                    .builder
                    .build_int_z_extend(b, self.ctx.i32_type(), &name)
                    .map_err(|e| LlvmError::Codegen(format!("LoadI8UAtAbsolute zext: {e}")))?;
                self.push(v, IrType::I32);
            }
            AbsLoad::F64 => {
                // Float ops are outside the present W3/W4 envelope; we
                // still accept LoadF64AtAbsolute to keep the dispatcher
                // exhaustive. The stack carries the bit-cast i64.
                let name = self.next_name("loadf64_abs");
                let v = self
                    .builder
                    .build_load(self.ctx.f64_type(), addr, &name)
                    .map_err(|e| LlvmError::Codegen(format!("LoadF64AtAbsolute: {e}")))?;
                // Bit-cast to i64 to feed the int-typed virtual stack.
                let i64_t = self.ctx.i64_type();
                let name = self.next_name("loadf64_bitcast");
                let bits = self
                    .builder
                    .build_bit_cast(v, i64_t, &name)
                    .map_err(|e| LlvmError::Codegen(format!("LoadF64 bitcast: {e}")))?
                    .into_int_value();
                self.push(bits, IrType::F64);
            }
        }
        Ok(())
    }

    pub(crate) fn emit_store_at_absolute(
        &mut self,
        ip_hint: &str,
        offset: u32,
        kind: AbsStore,
    ) -> Result<(), LlvmError> {
        // Stack: `[base, value]` — top is the value, below it is the
        // base. Mirrors cranelift's pop order.
        let value = self.pop_int(ip_hint)?;
        let base = self.pop_int(ip_hint)?;
        let access_size = match kind {
            AbsStore::I32 => 4,
            AbsStore::I64 => 8,
            AbsStore::I8 => 1,
            AbsStore::F64 => 8,
        };
        let addr = self.compose_abs_addr(base, offset, access_size, "StoreAtAbsolute")?;
        match kind {
            AbsStore::I32 => {
                self.builder
                    .build_store(addr, value)
                    .map_err(|e| LlvmError::Codegen(format!("StoreI32AtAbsolute: {e}")))?;
            }
            AbsStore::I64 => {
                self.builder
                    .build_store(addr, value)
                    .map_err(|e| LlvmError::Codegen(format!("StoreI64AtAbsolute: {e}")))?;
            }
            AbsStore::I8 => {
                // Narrow the i32 value to i8 before the store.
                let name = self.next_name("storei8_trunc");
                let narrowed = self
                    .builder
                    .build_int_truncate(value, self.ctx.i8_type(), &name)
                    .map_err(|e| LlvmError::Codegen(format!("StoreI8AtAbsolute trunc: {e}")))?;
                self.builder
                    .build_store(addr, narrowed)
                    .map_err(|e| LlvmError::Codegen(format!("StoreI8AtAbsolute: {e}")))?;
            }
            AbsStore::F64 => {
                // The IR's virtual stack carries f64 as bit-cast i64;
                // bit-cast back before the store so the destination
                // bytes match the wasm f64 wire layout.
                let name = self.next_name("storef64_bitcast");
                let f = self
                    .builder
                    .build_bit_cast(value, self.ctx.f64_type(), &name)
                    .map_err(|e| LlvmError::Codegen(format!("StoreF64 bitcast: {e}")))?;
                self.builder
                    .build_store(addr, f)
                    .map_err(|e| LlvmError::Codegen(format!("StoreF64AtAbsolute: {e}")))?;
            }
        }
        Ok(())
    }

    /// Lower `Op::MemcpyAtAbsolute`. Stack: `[dst, src, len]`. Calls
    /// LLVM's `llvm.memcpy.p0.p0.i64` intrinsic with both pointers
    /// resolved through `arena_base`.
    pub(crate) fn emit_memcpy_at_absolute(&mut self, ip_hint: &str) -> Result<(), LlvmError> {
        let len = self.pop_int(ip_hint)?;
        let src = self.pop_int(ip_hint)?;
        let dst = self.pop_int(ip_hint)?;
        let dst_ptr = self.arena_addr_i32_checked(dst, len, "MemcpyAtAbsolute dst")?;
        let src_ptr = self.arena_addr_i32_checked(src, len, "MemcpyAtAbsolute src")?;
        // `inkwell`'s `build_memcpy` requires the length to be the
        // pointer-width int. Widen our i32 length to i64 (zero-extend).
        let i64_t = self.ctx.i64_type();
        let len64 = self
            .builder
            .build_int_z_extend(len, i64_t, "memcpy_len_zext")
            .map_err(|e| LlvmError::Codegen(format!("memcpy len zext: {e}")))?;
        // Pick a 1-byte alignment hint — the inner records aren't
        // guaranteed > 1-byte aligned (string headers land on 4-byte
        // boundaries but their payload follows immediately). The LLVM
        // optimiser will refine when it can prove a tighter bound.
        self.builder
            .build_memcpy(dst_ptr, 1, src_ptr, 1, len64)
            .map_err(|e| LlvmError::Codegen(format!("MemcpyAtAbsolute build: {e}")))?;
        Ok(())
    }

    /// Bump-allocate `size_v` (i32) bytes inside the arena's scratch
    /// region. Pushes the pre-bump cursor as an arena-relative i32
    /// offset onto the virtual stack — same shape as cranelift's
    /// `emit_alloc_scratch`.
    pub(crate) fn emit_alloc_scratch_common(
        &mut self,
        size_v: IntValue<'ctx>,
    ) -> Result<(), LlvmError> {
        let state_ptr = self.state_ptr.ok_or_else(|| {
            LlvmError::Codegen(
                "AllocScratch outside buffer-protocol entry shape (no state ptr)".into(),
            )
        })?;
        let i32_t = self.ctx.i32_type();
        let i8_t = self.ctx.i8_type();

        // GEP-then-load helpers. We hand-roll the i8-offset GEPs
        // because the inkwell wrappers expect a struct field accessor.
        let cursor_gep = unsafe {
            self.builder
                .build_in_bounds_gep(
                    i8_t,
                    state_ptr,
                    &[i32_t.const_int(u64::from(ARENA_STATE_OFFSET_SCRATCH_CURSOR), false)],
                    "scratch_cursor_gep",
                )
                .map_err(|e| LlvmError::Codegen(format!("scratch_cursor GEP: {e}")))?
        };
        let cur = self
            .builder
            .build_load(i32_t, cursor_gep, "scratch_cursor")
            .map_err(|e| LlvmError::Codegen(format!("scratch_cursor load: {e}")))?
            .into_int_value();
        let base_gep = unsafe {
            self.builder
                .build_in_bounds_gep(
                    i8_t,
                    state_ptr,
                    &[i32_t.const_int(u64::from(ARENA_STATE_OFFSET_SCRATCH_BASE), false)],
                    "scratch_base_gep",
                )
                .map_err(|e| LlvmError::Codegen(format!("scratch_base GEP: {e}")))?
        };
        let scratch_base = self
            .builder
            .build_load(i32_t, base_gep, "scratch_base")
            .map_err(|e| LlvmError::Codegen(format!("scratch_base load: {e}")))?
            .into_int_value();

        // Returned arena-relative offset = scratch_base + cur.
        let off = self
            .builder
            .build_int_add(scratch_base, cur, "scratch_off")
            .map_err(|e| LlvmError::Codegen(format!("scratch off add: {e}")))?;
        // New cursor = cur + size.
        let new_cur = self
            .builder
            .build_int_add(cur, size_v, "scratch_new_cur")
            .map_err(|e| LlvmError::Codegen(format!("scratch cur bump: {e}")))?;
        self.emit_arena_bounds_check(off, size_v, "AllocScratch")?;
        self.builder
            .build_store(cursor_gep, new_cur)
            .map_err(|e| LlvmError::Codegen(format!("scratch cursor store: {e}")))?;
        self.push(off, IrType::I32);
        Ok(())
    }

    pub(crate) fn emit_alloc_scratch_static(&mut self, size_bytes: u32) -> Result<(), LlvmError> {
        let size_v = self.ctx.i32_type().const_int(u64::from(size_bytes), false);
        self.emit_alloc_scratch_common(size_v)
    }

    pub(crate) fn emit_alloc_scratch_dyn(&mut self, ip_hint: &str) -> Result<(), LlvmError> {
        let size = self.pop_int(ip_hint)?;
        self.emit_alloc_scratch_common(size)
    }

    /// Lower `Op::StoreField { ty }` for pointer-indirect types
    /// (`String`, `ListInt`, `ListFloat`, `ListBool`). Pops the source
    /// arena offset, copies the `[len:u32 LE][payload]` record into
    /// the output buffer's tail region (`out_ptr + tail_cursor`),
    /// writes `tail_cursor` (buffer-relative offset of the new record)
    /// into the fixed-area slot at `offset`, and bumps `tail_cursor`.
    /// Mirrors cranelift's `emit_store_pointer_indirect`.
    pub(crate) fn emit_store_field_pointer_indirect(
        &mut self,
        ip_hint: &str,
        offset: u32,
        ty: IrType,
    ) -> Result<(), LlvmError> {
        let arena_base_ptr = self.arena_base_ptr.ok_or_else(|| {
            LlvmError::Codegen("StoreField (pointer-indirect) outside buffer entry".into())
        })?;
        let state_ptr = self.state_ptr.ok_or_else(|| {
            LlvmError::Codegen("StoreField (pointer-indirect): missing state ptr".into())
        })?;
        let src_off_i32 = self.pop_int(ip_hint)?;
        let i32_t = self.ctx.i32_type();
        let i8_t = self.ctx.i8_type();
        // Read the record's `[len: u32]` header to size the memcpy.
        let src_abs =
            self.arena_addr_i32_checked_const(src_off_i32, 4, "StoreField ptr src len")?;
        let len_i32 = self
            .builder
            .build_load(i32_t, src_abs, "ptr_indirect_len")
            .map_err(|e| LlvmError::Codegen(format!("ptr-indirect len load: {e}")))?
            .into_int_value();
        let record_size = match ty {
            IrType::String => {
                let four = i32_t.const_int(4, false);
                self.builder
                    .build_int_add(len_i32, four, "string_recsize")
                    .map_err(|e| LlvmError::Codegen(format!("String record_size: {e}")))?
            }
            IrType::ListInt | IrType::ListFloat => {
                // record_size = 8 + 8 * element_count.
                let three = i32_t.const_int(3, false);
                let shifted = self
                    .builder
                    .build_left_shift(len_i32, three, "list_shl")
                    .map_err(|e| LlvmError::Codegen(format!("list shl: {e}")))?;
                let eight = i32_t.const_int(8, false);
                self.builder
                    .build_int_add(shifted, eight, "list_recsize")
                    .map_err(|e| LlvmError::Codegen(format!("list record_size: {e}")))?
            }
            IrType::ListBool => {
                let four = i32_t.const_int(4, false);
                self.builder
                    .build_int_add(len_i32, four, "listbool_recsize")
                    .map_err(|e| LlvmError::Codegen(format!("listbool record_size: {e}")))?
            }
            _ => {
                return Err(LlvmError::Codegen(format!(
                    "emit_store_field_pointer_indirect: unsupported {ty:?}"
                )));
            }
        };
        // Pick the alignment for the tail bump. String / ListBool stay
        // 4-aligned (the leading u32 length); ListInt / ListFloat need
        // 8 so the i64 / f64 payload that follows is aligned.
        let align: u32 = match ty {
            IrType::String | IrType::ListBool => 4,
            IrType::ListInt | IrType::ListFloat => 8,
            _ => unreachable!(),
        };
        // Tail bump: aligned = align_up(cur, align); new_cur = aligned + record_size.
        let tail_gep = unsafe {
            self.builder
                .build_in_bounds_gep(
                    i8_t,
                    state_ptr,
                    &[i32_t.const_int(u64::from(ARENA_STATE_OFFSET_TAIL_CURSOR), false)],
                    "tail_cursor_gep",
                )
                .map_err(|e| LlvmError::Codegen(format!("tail_cursor GEP: {e}")))?
        };
        let cur = self
            .builder
            .build_load(i32_t, tail_gep, "tail_cursor_pre")
            .map_err(|e| LlvmError::Codegen(format!("tail_cursor load: {e}")))?
            .into_int_value();
        let aligned = if align <= 1 {
            cur
        } else {
            let add = i32_t.const_int(u64::from(align - 1), false);
            let mask_val = !(align - 1);
            let mask = i32_t.const_int(u64::from(mask_val), false);
            let sum = self
                .builder
                .build_int_add(cur, add, "tail_align_sum")
                .map_err(|e| LlvmError::Codegen(format!("tail align add: {e}")))?;
            self.builder
                .build_and(sum, mask, "tail_align_and")
                .map_err(|e| LlvmError::Codegen(format!("tail align and: {e}")))?
        };
        let new_cur = self
            .builder
            .build_int_add(aligned, record_size, "tail_cursor_post")
            .map_err(|e| LlvmError::Codegen(format!("tail cur bump: {e}")))?;
        self.builder
            .build_store(tail_gep, new_cur)
            .map_err(|e| LlvmError::Codegen(format!("tail cursor store: {e}")))?;
        // Write the destination record at `out_ptr + aligned`.
        //
        // The record's *inner* padding is position-dependent: the host
        // protocol lays the payload at `align_up(record_start + 4,
        // align)`, so the gap between the 4-byte `[len]` header and the
        // payload is `align_up(record_start + 4, align) - record_start -
        // 4` — which differs between the source record (whatever offset
        // the input marshaller / const-pool put it at) and the freshly-
        // aligned destination slot. A verbatim `memcpy(record_size)` from
        // the source therefore drags the *source's* pad geometry into the
        // destination and misaligns the payload whenever the two record
        // starts have different `% align` residues (e.g. a `List<Int>`
        // input arg lands its record 4-aligned-but-not-8, payload at
        // header+4; the 8-aligned output slot expects payload at
        // header+8). So copy the `[len]` header and the payload
        // *separately*, reading the payload from the source's actual
        // payload position and writing it to the destination's — the pad
        // is recomputed on each side rather than copied.
        let out_ptr_i32 = self.lookup_param(2)?; // IR LocalGet(2) == out_ptr
        let dst_off = self
            .builder
            .build_int_add(out_ptr_i32, aligned, "ptr_indirect_dst_off")
            .map_err(|e| LlvmError::Codegen(format!("ptr-indirect dst off: {e}")))?;
        let dst_ptr = self.arena_addr_i32_checked_const(dst_off, 4, "StoreField ptr dst len")?;
        let i64_t = self.ctx.i64_type();
        let _ = arena_base_ptr;
        // Header: store the `[len: u32]` prefix at the destination record
        // start (`dst_off + 0`).
        self.builder
            .build_store(dst_ptr, len_i32)
            .map_err(|e| LlvmError::Codegen(format!("ptr-indirect len store: {e}")))?;
        // Payload byte count: String / ListBool are 1 byte/element,
        // ListInt / ListFloat are 8.
        let payload_bytes = match ty {
            IrType::String | IrType::ListBool => len_i32,
            IrType::ListInt | IrType::ListFloat => self
                .builder
                .build_left_shift(len_i32, i32_t.const_int(3, false), "payload_shl")
                .map_err(|e| LlvmError::Codegen(format!("payload shl: {e}")))?,
            _ => unreachable!("record_size match already rejected other types"),
        };
        // Source payload offset = align_up(src_off + 4, align). Recompute
        // it from the (arena-relative) source record start rather than
        // assuming a fixed header+pad — see the comment above.
        let src_payload_off = self.align_up_const(src_off_i32, 4, align, "src_payload")?;
        let src_payload_ptr =
            self.arena_addr_i32_checked(src_payload_off, payload_bytes, "StoreField ptr src")?;
        // Destination payload offset = align_up(dst_off + 4, align).
        let dst_payload_off = self.align_up_const(dst_off, 4, align, "dst_payload")?;
        let dst_payload_ptr =
            self.arena_addr_i32_checked(dst_payload_off, payload_bytes, "StoreField ptr dst")?;
        let payload64 = self
            .builder
            .build_int_z_extend(payload_bytes, i64_t, "ptr_indirect_payload64")
            .map_err(|e| LlvmError::Codegen(format!("payload64 zext: {e}")))?;
        self.builder
            .build_memcpy(dst_payload_ptr, align, src_payload_ptr, 1, payload64)
            .map_err(|e| LlvmError::Codegen(format!("ptr-indirect payload memcpy: {e}")))?;
        // Store the record's **arena-absolute** offset (`dst_off =
        // out_ptr + aligned`, the F1 slot convention) into the fixed-area
        // slot at `out_ptr + offset`.
        let slot_off = self
            .builder
            .build_int_add(
                out_ptr_i32,
                i32_t.const_int(u64::from(offset), false),
                "ptr_indirect_slot_off",
            )
            .map_err(|e| LlvmError::Codegen(format!("ptr-indirect slot off: {e}")))?;
        let slot_addr =
            self.arena_addr_i32_checked_const(slot_off, 4, "StoreField ptr fixed slot")?;
        self.builder
            .build_store(slot_addr, dst_off)
            .map_err(|e| LlvmError::Codegen(format!("ptr-indirect slot store: {e}")))?;
        // Flag the body so the buffer-protocol epilogue returns the
        // post-bump tail cursor.
        self.needs_tail_cursor = true;
        Ok(())
    }

    /// Lower `Op::StoreField { ty: ListString }` — the pointer-*array*
    /// marshalling. The source record (materialised by
    /// `Op::ConstListString`) is one contiguous
    /// arena block laid out as
    ///
    /// ```text
    ///   [str_0 record][str_1 record]...[str_{N-1} record][header]
    /// ```
    ///
    /// where each `str_i` record is `[slen: u32][utf8]` (4-aligned) and
    /// the header is `[len: u32][off_0: u32]...[off_{N-1}: u32]`
    /// (4-aligned). Every `off_i` is an *arena-relative* byte offset to
    /// `str_i`. The String records sit *before* the header, so the block
    /// spans `[off_0, header_end)` — `off_0` is the lowest offset.
    ///
    /// Because the entire block moves rigidly into the output buffer's
    /// tail, a single delta relocates every inner pointer:
    ///
    /// ```text
    ///   delta      = dst_block_bufrel - src_block_start_arena
    ///   new_off_i  = off_i + delta            (buffer-relative)
    ///   new_header = header_off + delta       (buffer-relative)
    /// ```
    ///
    /// `delta` is a multiple of 4 (both endpoints are 4-aligned), so the
    /// rigid copy preserves every inner `[slen][utf8]` record's 4-byte
    /// alignment. We therefore (1) memcpy the whole block into the tail,
    /// (2) stamp `new_header` into the fixed-area slot, and (3) walk the
    /// copied header's offset array adding `delta` to each entry —
    /// rewriting the arena coordinates into out-buffer coordinates the
    /// host `BufferReader::read_list_string` walks.
    pub(crate) fn emit_store_field_list_string(
        &mut self,
        ip_hint: &str,
        offset: u32,
    ) -> Result<(), LlvmError> {
        let header_off = self.pop_int(ip_hint)?;
        let new_header = self.copy_list_string_block(header_off)?;
        // new_header is arena-absolute (F1) -> fixed-area slot at
        // out_ptr + offset.
        let i32_t = self.ctx.i32_type();
        let out_ptr = self.lookup_param(2)?;
        let slot_off = self
            .builder
            .build_int_add(
                out_ptr,
                i32_t.const_int(u64::from(offset), false),
                "ls_slot_off",
            )
            .map_err(|e| LlvmError::Codegen(format!("ListString slot off: {e}")))?;
        let slot_addr =
            self.arena_addr_i32_checked_const(slot_off, 4, "StoreField ListString slot")?;
        self.builder
            .build_store(slot_addr, new_header)
            .map_err(|e| LlvmError::Codegen(format!("ListString slot store: {e}")))?;
        Ok(())
    }

    /// Copy a `List<String>` pointer-array block referenced by the
    /// arena-relative `header_off` into the output buffer's tail and
    /// relocate every inner offset, returning the buffer-relative offset
    /// of the copied header. Shared by the top-level `StoreField { ty:
    /// ListString }` path and the `EmitTailRecordFromAbsoluteAddr { ty:
    /// ListString }` (record-field) path. Mirrors the cranelift backend's
    /// `copy_list_string_block`.
    pub(crate) fn copy_list_string_block(
        &mut self,
        header_off: inkwell::values::IntValue<'ctx>,
    ) -> Result<inkwell::values::IntValue<'ctx>, LlvmError> {
        let _arena_base_ptr = self.arena_base_ptr.ok_or_else(|| {
            LlvmError::Codegen("StoreField (ListString) outside buffer entry".into())
        })?;
        let state_ptr = self.state_ptr.ok_or_else(|| {
            LlvmError::Codegen("StoreField (ListString): missing state ptr".into())
        })?;
        let i32_t = self.ctx.i32_type();
        let i8_t = self.ctx.i8_type();
        let i64_t = self.ctx.i64_type();

        // len = [header_off].
        let header_abs =
            self.arena_addr_i32_checked_const(header_off, 4, "ListString header len")?;
        let len = self
            .builder
            .build_load(i32_t, header_abs, "ls_len")
            .map_err(|e| LlvmError::Codegen(format!("ListString len load: {e}")))?
            .into_int_value();

        // offsets_end = header_off + 4 + len*4.
        let four = i32_t.const_int(4, false);
        let offs_bytes = self
            .builder
            .build_left_shift(len, i32_t.const_int(2, false), "ls_offs_bytes")
            .map_err(|e| LlvmError::Codegen(format!("ListString offs<<2: {e}")))?;
        let header_payload = self
            .builder
            .build_int_add(header_off, four, "ls_header_payload")
            .map_err(|e| LlvmError::Codegen(format!("ListString header+4: {e}")))?;
        let offsets_end = self
            .builder
            .build_int_add(header_payload, offs_bytes, "ls_offsets_end")
            .map_err(|e| LlvmError::Codegen(format!("ListString offsets_end: {e}")))?;

        // src_block_start = (len != 0) ? off_0 : header_off, where
        // off_0 = [header_off + 4] (the first / lowest String record
        // offset). For an empty list the block is just the 4-byte header;
        // do not speculatively load off_0 in that case, because the slot
        // is outside the empty header.
        let len_nz = self
            .builder
            .build_int_compare(
                inkwell::IntPredicate::NE,
                len,
                i32_t.const_zero(),
                "ls_len_nz",
            )
            .map_err(|e| LlvmError::Codegen(format!("ListString len!=0: {e}")))?;
        let off0_load_bb = self.ctx.append_basic_block(self.func, "ls_off0_load");
        let off0_done_bb = self.ctx.append_basic_block(self.func, "ls_off0_done");
        let off0_pre_bb = self
            .builder
            .get_insert_block()
            .ok_or_else(|| LlvmError::Codegen("ListString off0: no insert block".into()))?;
        self.builder
            .build_conditional_branch(len_nz, off0_load_bb, off0_done_bb)
            .map_err(|e| LlvmError::Codegen(format!("ListString off0 branch: {e}")))?;

        self.builder.position_at_end(off0_load_bb);
        let off0_abs = self.arena_addr_i32_checked_const(header_payload, 4, "ListString off0")?;
        let off0 = self
            .builder
            .build_load(i32_t, off0_abs, "ls_off0")
            .map_err(|e| LlvmError::Codegen(format!("ListString off0 load: {e}")))?
            .into_int_value();
        let off0_load_end = self
            .builder
            .get_insert_block()
            .ok_or_else(|| LlvmError::Codegen("ListString off0: no load block".into()))?;
        self.builder
            .build_unconditional_branch(off0_done_bb)
            .map_err(|e| LlvmError::Codegen(format!("ListString off0 done br: {e}")))?;

        self.builder.position_at_end(off0_done_bb);
        let src_block_phi = self
            .builder
            .build_phi(i32_t, "ls_block_start")
            .map_err(|e| LlvmError::Codegen(format!("ListString block_start phi: {e}")))?;
        src_block_phi.add_incoming(&[(&header_off, off0_pre_bb), (&off0, off0_load_end)]);
        let src_block_start = src_block_phi.as_basic_value().into_int_value();
        let block_size = self
            .builder
            .build_int_sub(offsets_end, src_block_start, "ls_block_size")
            .map_err(|e| LlvmError::Codegen(format!("ListString block_size: {e}")))?;

        // Tail-bump: align cur up to 4, reserve block_size.
        let tail_gep = unsafe {
            self.builder
                .build_in_bounds_gep(
                    i8_t,
                    state_ptr,
                    &[i32_t.const_int(u64::from(ARENA_STATE_OFFSET_TAIL_CURSOR), false)],
                    "ls_tail_gep",
                )
                .map_err(|e| LlvmError::Codegen(format!("ListString tail GEP: {e}")))?
        };
        let cur = self
            .builder
            .build_load(i32_t, tail_gep, "ls_tail_pre")
            .map_err(|e| LlvmError::Codegen(format!("ListString tail load: {e}")))?
            .into_int_value();
        let sum = self
            .builder
            .build_int_add(cur, i32_t.const_int(3, false), "ls_tail_align_sum")
            .map_err(|e| LlvmError::Codegen(format!("ListString tail align add: {e}")))?;
        let dst_block = self
            .builder
            .build_and(
                sum,
                i32_t.const_int(u64::from(!3u32), false),
                "ls_dst_block",
            )
            .map_err(|e| LlvmError::Codegen(format!("ListString tail align and: {e}")))?;
        let new_cur = self
            .builder
            .build_int_add(dst_block, block_size, "ls_tail_post")
            .map_err(|e| LlvmError::Codegen(format!("ListString tail bump: {e}")))?;
        self.builder
            .build_store(tail_gep, new_cur)
            .map_err(|e| LlvmError::Codegen(format!("ListString tail store: {e}")))?;

        // memcpy(out_ptr + dst_block, arena + src_block_start, block_size).
        let out_ptr = self.lookup_param(2)?;
        let dst_off = self
            .builder
            .build_int_add(out_ptr, dst_block, "ls_dst_off")
            .map_err(|e| LlvmError::Codegen(format!("ListString dst off: {e}")))?;
        let dst_ptr = self.arena_addr_i32_checked(dst_off, block_size, "ListString dst block")?;
        let src_ptr =
            self.arena_addr_i32_checked(src_block_start, block_size, "ListString src block")?;
        let block64 = self
            .builder
            .build_int_z_extend(block_size, i64_t, "ls_block64")
            .map_err(|e| LlvmError::Codegen(format!("ListString block64 zext: {e}")))?;
        self.builder
            .build_memcpy(dst_ptr, 4, src_ptr, 1, block64)
            .map_err(|e| LlvmError::Codegen(format!("ListString block memcpy: {e}")))?;

        // Two deltas. `delta` (= dst_block - src_block_start) maps a source
        // arena offset to its out_buf-relative position in the copied block
        // (used to address the entries we rewrite). `delta_abs` (= out_ptr +
        // delta) maps it all the way to an **arena-absolute** offset — F1
        // stores every in-buffer pointer (the returned header slot value and
        // each inner pointer-array entry) arena-absolute, so both use
        // `delta_abs`.
        let delta = self
            .builder
            .build_int_sub(dst_block, src_block_start, "ls_delta")
            .map_err(|e| LlvmError::Codegen(format!("ListString delta: {e}")))?;
        let delta_abs = self
            .builder
            .build_int_add(out_ptr, delta, "ls_delta_abs")
            .map_err(|e| LlvmError::Codegen(format!("ListString delta_abs: {e}")))?;

        // new_header_bufrel = header_off + delta (used only to address the
        // entry array we rewrite). The returned slot value adds out_ptr.
        let new_header_bufrel = self
            .builder
            .build_int_add(header_off, delta, "ls_new_header_bufrel")
            .map_err(|e| LlvmError::Codegen(format!("ListString new_header bufrel: {e}")))?;
        let new_header_abs = self
            .builder
            .build_int_add(header_off, delta_abs, "ls_new_header_abs")
            .map_err(|e| LlvmError::Codegen(format!("ListString new_header abs: {e}")))?;

        // Relocation loop: for i in 0..len, the copied header's offset
        // entry at (out_ptr + new_header_bufrel + 4 + i*4) is rewritten to
        // (off_i + delta_abs). `new_header_bufrel + 4` is the copied offset
        // array's buffer-relative start.
        let entries_base = self
            .builder
            .build_int_add(
                self.builder
                    .build_int_add(out_ptr, new_header_bufrel, "ls_entries_hdr")
                    .map_err(|e| LlvmError::Codegen(format!("ListString entries hdr: {e}")))?,
                four,
                "ls_entries_base",
            )
            .map_err(|e| LlvmError::Codegen(format!("ListString entries base: {e}")))?;

        let loop_hdr = self.ctx.append_basic_block(self.func, "ls_reloc_hdr");
        let loop_body = self.ctx.append_basic_block(self.func, "ls_reloc_body");
        let loop_done = self.ctx.append_basic_block(self.func, "ls_reloc_done");
        let pre_bb = self
            .builder
            .get_insert_block()
            .ok_or_else(|| LlvmError::Codegen("ListString reloc: no insert block".into()))?;
        self.builder
            .build_unconditional_branch(loop_hdr)
            .map_err(|e| LlvmError::Codegen(format!("ListString reloc entry br: {e}")))?;

        // Header: i = phi [0, pre], [i_next, body].
        self.builder.position_at_end(loop_hdr);
        let i_phi = self
            .builder
            .build_phi(i32_t, "ls_i")
            .map_err(|e| LlvmError::Codegen(format!("ListString i phi: {e}")))?;
        i_phi.add_incoming(&[(&i32_t.const_zero(), pre_bb)]);
        let i_val = i_phi.as_basic_value().into_int_value();
        let cond = self
            .builder
            .build_int_compare(inkwell::IntPredicate::ULT, i_val, len, "ls_i_lt")
            .map_err(|e| LlvmError::Codegen(format!("ListString i<len: {e}")))?;
        self.builder
            .build_conditional_branch(cond, loop_body, loop_done)
            .map_err(|e| LlvmError::Codegen(format!("ListString reloc cond br: {e}")))?;

        // Body: entry_off = entries_base + i*4; *(entry) += delta.
        self.builder.position_at_end(loop_body);
        let i_bytes = self
            .builder
            .build_left_shift(i_val, i32_t.const_int(2, false), "ls_i_bytes")
            .map_err(|e| LlvmError::Codegen(format!("ListString i<<2: {e}")))?;
        let entry_off = self
            .builder
            .build_int_add(entries_base, i_bytes, "ls_entry_off")
            .map_err(|e| LlvmError::Codegen(format!("ListString entry off: {e}")))?;
        let entry_addr =
            self.arena_addr_i32_checked_const(entry_off, 4, "ListString reloc entry")?;
        let old = self
            .builder
            .build_load(i32_t, entry_addr, "ls_entry_old")
            .map_err(|e| LlvmError::Codegen(format!("ListString entry load: {e}")))?
            .into_int_value();
        let new = self
            .builder
            .build_int_add(old, delta_abs, "ls_entry_new")
            .map_err(|e| LlvmError::Codegen(format!("ListString entry reloc: {e}")))?;
        self.builder
            .build_store(entry_addr, new)
            .map_err(|e| LlvmError::Codegen(format!("ListString entry store: {e}")))?;
        let i_next = self
            .builder
            .build_int_add(i_val, i32_t.const_int(1, false), "ls_i_next")
            .map_err(|e| LlvmError::Codegen(format!("ListString i++: {e}")))?;
        let body_end = self
            .builder
            .get_insert_block()
            .ok_or_else(|| LlvmError::Codegen("ListString reloc: no body block".into()))?;
        i_phi.add_incoming(&[(&i_next, body_end)]);
        self.builder
            .build_unconditional_branch(loop_hdr)
            .map_err(|e| LlvmError::Codegen(format!("ListString reloc back-edge: {e}")))?;

        self.builder.position_at_end(loop_done);
        self.needs_tail_cursor = true;
        Ok(new_header_abs)
    }
}