qala-compiler 0.1.1

Compiler and bytecode VM for the Qala programming language
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
//! integer-expression codegen for the ARM64 backend.
//!
//! [`Arm64Backend::compile_expr`] lowers one [`TypedExpr`] to AArch64
//! instructions. it is split into this file as a second `impl Arm64Backend`
//! block -- Rust allows an `impl` to span several files of the same module --
//! while the struct itself and the program/function walk live in `mod.rs`.
//!
//! ## the result convention
//!
//! every `compile_expr` call leaves its result in `x0`. a binary operator
//! evaluates the LHS into `x0`, spills `x0` to a scratch stack slot, evaluates
//! the RHS into `x0`, reloads the LHS into `x9`, and applies the operator
//! writing `x0`. scratch registers `x9`-`x15` hold only momentary values
//! within one operation -- never across a statement boundary, never across a
//! `bl`. nested binary operators each claim a distinct scratch slot from the
//! [`FrameLayout`](super::frame::FrameLayout) and release it after, so the
//! spill bookkeeping is a balanced stack.
//!
//! the integer core handles `i64` and `bool`: integer literals, boolean
//! literals, identifier loads, parenthesised expressions, the unary `!` and
//! `-`, the binary `+ - * / %`, the six comparisons, the short-circuit
//! `&&` / `||`, and a call to a user function. every other expression
//! construct -- a float, a string, a struct literal, a method call, a range,
//! a call to a stdlib function -- is unsupported and returns a [`QalaError`],
//! never a panic.
//!
//! ## a call expression
//!
//! a [`TypedExpr::Call`] to a user function lowers to AAPCS64 argument passing
//! and a `bl`. because every `compile_expr` lands its result in `x0`,
//! evaluating argument 1 into `x0` would clobber argument 0 -- so each
//! argument is evaluated, then a fresh scratch slot is CLAIMED (the same
//! [`claim_scratch`](super::frame::FrameLayout::claim_scratch) stack the
//! nested-binary spill discipline uses) and the result `str`-ed into it.
//! after all arguments are evaluated a run of `ldr x0`, `ldr x1`, ... loads
//! the slots into the AAPCS64 argument registers; then `bl <name>`, the
//! result in `x0`; then every claimed slot is released.
//!
//! claiming -- rather than using one fixed shared run -- is what makes a
//! NESTED call correct. an argument that is itself a call runs to completion
//! while the outer call's already-evaluated arguments sit in claimed slots;
//! the nested call claims its own argument slots STRICTLY BEYOND those, so it
//! cannot overwrite the outer call's saved arguments. the argument is
//! evaluated BEFORE its slot is claimed, so the argument's own transient
//! scratch claims (a binary operator inside it) release before the
//! persistent argument slot is taken.

use crate::ast::{BinOp, UnaryOp};
use crate::errors::QalaError;
use crate::span::Span;
use crate::typed_ast::TypedExpr;

use super::Arm64Backend;

/// the AAPCS64 integer-argument register count: arguments 0-7 go in `x0`-`x7`,
/// a ninth argument would spill onto the stack. the integer core supports at
/// most this many arguments per call; a stack-passed argument is a Phase 13+
/// extension.
///
/// `pub(super)` so `print.rs` derives its `MAX_PRINTF_HOLES` from this one
/// source -- the printf hole limit is this count minus the `x0` the format
/// pointer takes.
pub(super) const MAX_CALL_ARGS: usize = 8;

/// the inclusive upper bound for emitting an `i64` literal as a bare `mov`.
///
/// AArch64 `mov` takes a 16-bit immediate; a value in `0..=65535` always
/// encodes. anything outside that range -- a larger constant or a negative
/// value -- is emitted with the `ldr xN, =<value>` literal-pool form, which
/// the assembler resolves for any 64-bit value.
const MOV_IMM_MAX: i64 = 65535;

impl Arm64Backend {
    /// lower one typed expression to AArch64 instructions, leaving the result
    /// in `x0`.
    ///
    /// returns `Ok(())` once the instructions are emitted, or a
    /// [`QalaError::Type`] for an unsupported construct (a float, a string, a
    /// call, a range, ...). the error carries the offending node's span so the
    /// diagnostic points at the exact construct.
    pub(super) fn compile_expr(&mut self, expr: &TypedExpr) -> Result<(), QalaError> {
        match expr {
            TypedExpr::Int { value, .. } => {
                self.emit_int_literal(*value);
                Ok(())
            }
            TypedExpr::Bool { value, .. } => {
                // a bool is 1 (true) or 0 (false) in x0.
                self.asm.emit_insn(if *value {
                    "mov     x0, 1"
                } else {
                    "mov     x0, 0"
                });
                Ok(())
            }
            TypedExpr::Ident { name, span, .. } => {
                // a `let`/`for` binding (resolved through the scope stack,
                // newest-first so a shadowing binding wins) or a function
                // parameter -- load its slot. an unresolved name is a backend
                // bug, surfaced as an error rather than a panic (the
                // typechecker already proved the name resolves, so this path
                // is defensive).
                let slot = self.resolve_name(name).ok_or_else(|| QalaError::Type {
                    span: *span,
                    message: format!("arm64 backend: name `{name}` has no stack slot"),
                })?;
                self.asm
                    .emit_insn_commented(&format!("ldr     x0, [fp, {slot}]"), name);
                Ok(())
            }
            TypedExpr::Paren { inner, .. } => {
                // parentheses are semantically transparent.
                self.compile_expr(inner)
            }
            TypedExpr::Unary { op, operand, .. } => self.compile_unary(op, operand),
            TypedExpr::Binary { op, lhs, rhs, .. } => self.compile_binary(op, lhs, rhs),
            // Block recurses into the block via the minimal compile_block held
            // in mod.rs for this plan (plan 12-02 replaces it with the full
            // scope-aware version in stmt.rs).
            TypedExpr::Block { block, .. } => self.compile_block(block),
            // a call to a user function: AAPCS64 argument passing and a bl.
            TypedExpr::Call {
                callee, args, span, ..
            } => self.compile_call(callee, args, *span),
            // a Range only appears as a `for` iterable -- `stmt.rs`'s For arm
            // matches it directly. a Range reaching `compile_expr` is a Range
            // used as a value, which the integer core does not support.
            TypedExpr::Range { span, .. } => Err(QalaError::Type {
                span: *span,
                message: "the arm64 backend does not yet support ranges".to_string(),
            }),
            // every remaining construct is beyond the integer core.
            _ => Err(QalaError::Type {
                span: expr.span(),
                message: format!(
                    "the arm64 backend does not yet support {}",
                    unsupported_expr_name(expr)
                ),
            }),
        }
    }

    /// emit an `i64` literal into `x0`.
    ///
    /// a value in `0..=65535` encodes directly as `mov x0, <value>`; anything
    /// else (a large constant, a negative value) uses the `ldr x0, =<value>`
    /// literal-pool form, which the assembler resolves for ANY 64-bit value --
    /// `i64::MIN` and `i64::MAX` included.
    ///
    /// `value` is always a valid `i64`: an integer literal whose magnitude
    /// exceeds `i64::MAX` is rejected by the lexer as an overflow before this
    /// backend ever runs, and a leading `-` is a separate unary `Neg` over a
    /// non-negative magnitude (handled by `compile_unary`, not folded into the
    /// literal here). so there is no unrepresentable literal to guard against.
    fn emit_int_literal(&mut self, value: i64) {
        if (0..=MOV_IMM_MAX).contains(&value) {
            self.asm.emit_insn(&format!("mov     x0, {value}"));
        } else {
            self.asm.emit_insn(&format!("ldr     x0, ={value}"));
        }
    }

    /// emit a unary operator: `!` flips a boolean, `-` negates an integer.
    fn compile_unary(&mut self, op: &UnaryOp, operand: &TypedExpr) -> Result<(), QalaError> {
        // the operand result lands in x0.
        self.compile_expr(operand)?;
        match op {
            // operand is a 0/1 bool; eor with 1 flips bit 0.
            UnaryOp::Not => self.asm.emit_insn("eor     x0, x0, 1"),
            // operand is an i64; two's-complement negate.
            UnaryOp::Neg => self.asm.emit_insn("neg     x0, x0"),
        }
        Ok(())
    }

    /// emit a binary operator. arithmetic and comparison go through the
    /// scratch-slot spill discipline; `&&` / `||` go through their
    /// short-circuit label sequences.
    fn compile_binary(
        &mut self,
        op: &BinOp,
        lhs: &TypedExpr,
        rhs: &TypedExpr,
    ) -> Result<(), QalaError> {
        match op {
            BinOp::And => self.compile_short_circuit(lhs, rhs, ShortCircuit::And),
            BinOp::Or => self.compile_short_circuit(lhs, rhs, ShortCircuit::Or),
            // arithmetic and comparison: evaluate both operands through the
            // spill discipline, then apply the operator.
            _ => self.compile_spilled_binary(op, lhs, rhs),
        }
    }

    /// evaluate `lhs` and `rhs` through the scratch-slot spill discipline --
    /// leaving the LHS in `x9` and the RHS in `x0` -- then emit the operator's
    /// instruction(s).
    ///
    /// the spill discipline: claim a scratch slot, evaluate the LHS into `x0`,
    /// `str` it to the slot, evaluate the RHS into `x0`, `ldr` the LHS back
    /// into `x9`, release the slot. claims stack -- a nested binary RHS claims
    /// its own distinct slot -- so arbitrarily deep expressions never run out.
    fn compile_spilled_binary(
        &mut self,
        op: &BinOp,
        lhs: &TypedExpr,
        rhs: &TypedExpr,
    ) -> Result<(), QalaError> {
        let scratch = self.frame_mut().claim_scratch();
        // LHS -> x0 -> spill to the scratch slot.
        self.compile_expr(lhs)?;
        self.asm
            .emit_insn_commented(&format!("str     x0, [fp, {scratch}]"), "spill lhs");
        // RHS -> x0.
        self.compile_expr(rhs)?;
        // reload the LHS into x9; the slot is now free.
        self.asm
            .emit_insn_commented(&format!("ldr     x9, [fp, {scratch}]"), "reload lhs");
        self.frame_mut().release_scratch();
        // apply the operator: x9 = lhs, x0 = rhs, result -> x0.
        self.emit_binop(op);
        Ok(())
    }

    /// emit the instruction(s) for an arithmetic or comparison operator, with
    /// the LHS already in `x9` and the RHS in `x0`. the result lands in `x0`.
    ///
    /// `&&` and `||` never reach here -- they are short-circuit and handled
    /// separately. the fallthrough arm is unreachable in practice but returns
    /// without emitting rather than panicking, keeping the WASM build crash-free.
    fn emit_binop(&mut self, op: &BinOp) {
        match op {
            BinOp::Add => self.asm.emit_insn("add     x0, x9, x0"),
            BinOp::Sub => self.asm.emit_insn("sub     x0, x9, x0"),
            BinOp::Mul => self.asm.emit_insn("mul     x0, x9, x0"),
            // i64 is signed -> sdiv, not udiv.
            BinOp::Div => self.asm.emit_insn("sdiv    x0, x9, x0"),
            // no modulo instruction: a % b = a - (a/b)*b via msub. the quotient
            // goes in x10 so it does not clobber x9 (the lhs) before msub reads
            // it. `msub xd, xn, xm, xa` computes `xa - xn*xm`.
            BinOp::Rem => {
                self.asm.emit_insn("sdiv    x10, x9, x0");
                self.asm.emit_insn("msub    x0, x10, x0, x9");
            }
            // comparisons: cmp then cset with the SIGNED condition code
            // (lt/le/gt/ge -- not the unsigned lo/ls/hi/hs), since i64 is signed.
            BinOp::Eq => {
                self.asm.emit_insn("cmp     x9, x0");
                self.asm.emit_insn("cset    x0, eq");
            }
            BinOp::Ne => {
                self.asm.emit_insn("cmp     x9, x0");
                self.asm.emit_insn("cset    x0, ne");
            }
            BinOp::Lt => {
                self.asm.emit_insn("cmp     x9, x0");
                self.asm.emit_insn("cset    x0, lt");
            }
            BinOp::Le => {
                self.asm.emit_insn("cmp     x9, x0");
                self.asm.emit_insn("cset    x0, le");
            }
            BinOp::Gt => {
                self.asm.emit_insn("cmp     x9, x0");
                self.asm.emit_insn("cset    x0, gt");
            }
            BinOp::Ge => {
                self.asm.emit_insn("cmp     x9, x0");
                self.asm.emit_insn("cset    x0, ge");
            }
            // && / || are short-circuit and never routed here.
            BinOp::And | BinOp::Or => {}
        }
    }

    /// emit a short-circuit `&&` or `||`.
    ///
    /// `&&`: if the LHS is false the result is false and the RHS is skipped;
    /// `||`: if the LHS is true the result is true and the RHS is skipped. the
    /// RHS instructions sit *after* the first conditional branch, so they run
    /// only when the LHS does not settle the result. both produce a 0/1 in `x0`.
    fn compile_short_circuit(
        &mut self,
        lhs: &TypedExpr,
        rhs: &TypedExpr,
        kind: ShortCircuit,
    ) -> Result<(), QalaError> {
        // a fresh label pair, unique across the whole emitted file.
        let settle = self.labels.fresh(kind.settle_prefix());
        let done = self.labels.fresh(kind.done_prefix());
        // the branch that short-circuits: cbz for &&, cbnz for ||.
        let branch = kind.branch_insn();
        // LHS -> x0; short-circuit if it settles the result.
        self.compile_expr(lhs)?;
        self.asm.emit_insn(&format!("{branch}    x0, {settle}"));
        // RHS -> x0; short-circuit on the same condition.
        self.compile_expr(rhs)?;
        self.asm.emit_insn(&format!("{branch}    x0, {settle}"));
        // neither operand short-circuited: the result is the non-settling value.
        self.asm
            .emit_insn(&format!("mov     x0, {}", kind.fallthrough_value()));
        self.asm.emit_insn(&format!("b       {done}"));
        // the settle label: the result is the short-circuit value.
        self.asm.emit_label(&settle);
        self.asm
            .emit_insn(&format!("mov     x0, {}", kind.settle_value()));
        self.asm.emit_label(&done);
        Ok(())
    }

    /// emit a call expression: a user function lowers to AAPCS64 argument
    /// passing and a `bl`, and an unshadowed `print` / `println` built-in
    /// lowers to a `printf` call. either way the (possibly discarded) result
    /// is left in `x0`.
    ///
    /// a user function declared as `print` or `println` shadows the built-in
    /// -- the typechecker does not reserve those names -- so it routes as an
    /// ordinary user call, matching the bytecode backend; only an unshadowed
    /// `print` / `println` reaches the printf lowering.
    ///
    /// rejects, with a clean [`QalaError`], a call the integer core does not
    /// support: a computed callee (one that is not a plain identifier), a
    /// callee that resolves to a stdlib function (the higher-order stdlib is
    /// deferred beyond v2), and a call with more than eight arguments (a
    /// stack-passed argument is out of scope -- a well-typed program cannot
    /// reach this since the callee has at most eight parameters, but the guard
    /// is defensive so a `bl` is never emitted with a wrong argument count).
    fn compile_call(
        &mut self,
        callee: &TypedExpr,
        args: &[TypedExpr],
        span: Span,
    ) -> Result<(), QalaError> {
        // the callee must be a plain identifier -- a computed callee (a call
        // returning a function, a field holding one) is out of the integer
        // core.
        let name = match callee {
            TypedExpr::Ident { name, .. } => name,
            _ => {
                return Err(QalaError::Type {
                    span,
                    message: "the arm64 backend does not yet support computed callees".to_string(),
                });
            }
        };
        // a user-declared function of this name shadows the stdlib built-in:
        // the typechecker does not reserve `print` / `println`, so a program
        // may declare its own `fn println` and the bytecode backend runs it.
        // such a name routes through the ordinary user-call path below, never
        // the printf lowering. only a genuine, unshadowed `print` / `println`
        // builtin reaches the printf path.
        if !self.fn_names.contains(name) {
            // `print` / `println` are the output built-ins: route them to the
            // interpolation-to-printf lowering. `frame.rs::is_print_callee`
            // gates the spill pre-walk on the identical condition.
            if name == "print" || name == "println" {
                return self.compile_print_call(name, args, span);
            }
            // every other non-user name resolves to a higher-order stdlib
            // built-in (`map`, `filter`, `reduce`, `sqrt`, ...) -- all deferred
            // beyond v2's integer slice, so reject it cleanly.
            return Err(QalaError::Type {
                span,
                message: format!("the arm64 backend does not yet support the `{name}` function"),
            });
        }
        // at most eight arguments -- AAPCS64 passes the rest on the stack,
        // which the integer core does not do. defensive: a well-typed call
        // cannot exceed the callee's parameter count, which `compile_fn`
        // already capped at eight.
        if args.len() > MAX_CALL_ARGS {
            return Err(QalaError::Type {
                span,
                message: "the arm64 backend supports at most 8 arguments".to_string(),
            });
        }

        // evaluate every argument into x0 and spill it to a freshly CLAIMED
        // scratch slot, BEFORE loading any argument register. evaluating
        // argument i+1 into x0 would clobber argument i, so the load into
        // x0..x{n-1} happens only after every argument is safely in a slot.
        //
        // each argument is evaluated FIRST, then its slot is claimed: the
        // argument's own transient scratch claims (a binary operator, or a
        // nested call's argument slots) are all released by the time
        // `compile_expr` returns, so the slot claimed here sits beyond them
        // and beyond every earlier argument's slot. a nested call therefore
        // cannot alias the outer call's saved arguments -- the bug a single
        // shared argument run would have. the claimed slots are released only
        // after the `bl`, so they stay reserved across the whole call.
        let mut arg_offsets = Vec::with_capacity(args.len());
        for (i, arg) in args.iter().enumerate() {
            // arg -> x0; a nested call completes entirely here, claiming and
            // releasing its own argument slots within this evaluation.
            self.compile_expr(arg)?;
            // now claim this argument's persistent slot and spill x0 into it.
            let slot = self.frame_mut().claim_scratch();
            self.asm
                .emit_insn_commented(&format!("str     x0, [fp, {slot}]"), &format!("arg {i}"));
            arg_offsets.push(slot);
        }
        // every argument is now in a slot -- load them into x0..x{n-1}.
        for (i, slot) in arg_offsets.iter().enumerate() {
            self.asm.emit_insn(&format!("ldr     x{i}, [fp, {slot}]"));
        }
        // the call. the result, by AAPCS64, is in x0 -- which is the result
        // convention compile_expr upholds.
        self.asm.emit_insn(&format!("bl      {name}"));
        // release the argument slots -- one per argument, balancing the claims
        // above so the scratch stack returns to where the call found it.
        for _ in &arg_offsets {
            self.frame_mut().release_scratch();
        }
        Ok(())
    }
}

/// which short-circuit operator is being emitted -- `&&` or `||`.
///
/// the two share one code path; this enum selects the branch instruction, the
/// label prefixes, and the two result values that differ between them.
#[derive(Clone, Copy)]
enum ShortCircuit {
    /// the `&&` operator: short-circuits to false on a false operand.
    And,
    /// the `||` operator: short-circuits to true on a true operand.
    Or,
}

impl ShortCircuit {
    /// the branch instruction that detects a short-circuit: `cbz` for `&&`
    /// (branch on a false/zero operand), `cbnz` for `||` (branch on a true one).
    fn branch_insn(self) -> &'static str {
        match self {
            ShortCircuit::And => "cbz ",
            ShortCircuit::Or => "cbnz",
        }
    }

    /// the label prefix for the short-circuit-settle target.
    fn settle_prefix(self) -> &'static str {
        match self {
            ShortCircuit::And => "and_false",
            ShortCircuit::Or => "or_true",
        }
    }

    /// the label prefix for the done/join target.
    fn done_prefix(self) -> &'static str {
        match self {
            ShortCircuit::And => "and_done",
            ShortCircuit::Or => "or_done",
        }
    }

    /// the result value when an operand short-circuits: `0` for `&&` (false),
    /// `1` for `||` (true).
    fn settle_value(self) -> u8 {
        match self {
            ShortCircuit::And => 0,
            ShortCircuit::Or => 1,
        }
    }

    /// the result value when neither operand short-circuits: `1` for `&&` (both
    /// true), `0` for `||` (both false).
    fn fallthrough_value(self) -> u8 {
        match self {
            ShortCircuit::And => 1,
            ShortCircuit::Or => 0,
        }
    }
}

/// the human name of an unsupported expression construct, for the rejection
/// diagnostic.
///
/// the match is EXHAUSTIVE over [`TypedExpr`] -- no `_` catch-all. a new typed
/// AST variant therefore breaks this build and forces a deliberate decision
/// (support it, or name it here) instead of silently degrading to a vacuous
/// "this construct" message. the integer-core variants (`Int`, `Bool`,
/// `Ident`, `Paren`, `Unary`, `Binary`, `Block`, `Call`, `Range`) are all
/// handled by an explicit arm of [`compile_expr`] before this helper is
/// reached -- they get an arm here only so the match stays exhaustive; the
/// string they map to is phrased as a backend bug, because seeing it would
/// mean `compile_expr` routed a handled variant into the rejection path.
fn unsupported_expr_name(expr: &TypedExpr) -> &'static str {
    match expr {
        TypedExpr::Float { .. } => "floats",
        TypedExpr::Byte { .. } => "byte values",
        TypedExpr::Str { .. } => "strings",
        TypedExpr::Tuple { .. } => "tuples",
        TypedExpr::ArrayLit { .. } | TypedExpr::ArrayRepeat { .. } => "arrays",
        TypedExpr::StructLit { .. } => "struct literals",
        TypedExpr::FieldAccess { .. } => "field access",
        TypedExpr::MethodCall { .. } => "method calls",
        TypedExpr::Index { .. } => "indexing",
        TypedExpr::Try { .. } => "the `?` operator",
        TypedExpr::Pipeline { .. } => "the pipeline operator",
        TypedExpr::Comptime { .. } => "comptime blocks",
        TypedExpr::Match { .. } => "match expressions",
        TypedExpr::OrElse { .. } => "the `or` fallback",
        TypedExpr::Interpolation { .. } => "string interpolation",
        // the integer-core variants: `compile_expr` handles each in its own
        // arm, so none of these can reach here. they are listed only to keep
        // the match exhaustive -- the string names a backend bug, since
        // reaching it means a handled variant was misrouted.
        TypedExpr::Int { .. }
        | TypedExpr::Bool { .. }
        | TypedExpr::Ident { .. }
        | TypedExpr::Paren { .. }
        | TypedExpr::Unary { .. }
        | TypedExpr::Binary { .. }
        | TypedExpr::Block { .. }
        | TypedExpr::Call { .. }
        | TypedExpr::Range { .. } => {
            "this construct (arm64 backend bug: a \
            supported expression reached the unsupported-construct path)"
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::lexer::Lexer;
    use crate::parser::Parser;
    use crate::typechecker::check_program;
    use crate::typed_ast::TypedItem;

    /// lex, parse, and typecheck `src`, then return the trailing-value
    /// expression of the named function. test sources are written as a single
    /// function whose body is one trailing expression.
    fn trailing_expr(src: &str, fn_name: &str) -> TypedExpr {
        let tokens = Lexer::tokenize(src).expect("lex failed");
        let ast = Parser::parse(&tokens).expect("parse failed");
        let (typed, terrors, _) = check_program(&ast, src);
        assert!(terrors.is_empty(), "typecheck errors: {terrors:?}");
        let decl = typed
            .iter()
            .find_map(|item| match item {
                TypedItem::Fn(d) if d.name == fn_name => Some(d.clone()),
                _ => None,
            })
            .unwrap_or_else(|| panic!("function `{fn_name}` not found"));
        *decl
            .body
            .value
            .expect("the test function has no trailing value")
    }

    /// compile `src`'s named function's trailing expression in isolation and
    /// return the emitted instruction text. the backend is given a frame
    /// planned from that function so scratch slots and parameter slots resolve.
    fn emit_expr(src: &str, fn_name: &str) -> String {
        let tokens = Lexer::tokenize(src).expect("lex failed");
        let ast = Parser::parse(&tokens).expect("parse failed");
        let (typed, terrors, _) = check_program(&ast, src);
        assert!(terrors.is_empty(), "typecheck errors: {terrors:?}");
        let decl = typed
            .iter()
            .find_map(|item| match item {
                TypedItem::Fn(d) if d.name == fn_name => Some(d.clone()),
                _ => None,
            })
            .unwrap_or_else(|| panic!("function `{fn_name}` not found"));
        let mut backend = Arm64Backend::new(src);
        backend.begin_function(&decl);
        let expr = decl.body.value.clone().expect("no trailing value");
        backend.compile_expr(&expr).expect("compile_expr failed");
        backend.take_text()
    }

    #[test]
    fn an_integer_literal_emits_a_mov() {
        let asm = emit_expr("fn f() -> i64 { 42 }", "f");
        assert!(asm.contains("mov     x0, 42"), "{asm}");
    }

    #[test]
    fn a_large_integer_literal_uses_the_literal_pool() {
        // a value past the 16-bit mov range falls back to ldr x0, =<value>.
        let asm = emit_expr("fn f() -> i64 { 100000 }", "f");
        assert!(asm.contains("ldr     x0, =100000"), "{asm}");
        assert!(!asm.contains("mov     x0, 100000"), "{asm}");
    }

    #[test]
    fn the_i64_min_magnitude_is_rejected_at_the_lexer_before_the_backend() {
        // `emit_int_literal` documents that `value` is always a valid `i64`.
        // the proof of that invariant: the bare literal `9223372036854775808`
        // -- the magnitude of `i64::MIN`, one past `i64::MAX` -- never reaches
        // the backend at all. it is an overflow the LEXER rejects, so a typed
        // AST carrying it cannot be built and the backend's `ldr =` form never
        // has to encode an out-of-range value.
        let over = i64::MAX as u64 + 1; // 9223372036854775808
        let src = format!("fn f() -> i64 {{ -{over} }}");
        let lexed = Lexer::tokenize(&src);
        assert!(
            lexed.is_err(),
            "the i64::MIN magnitude must be rejected by the lexer, not lowered"
        );
    }

    #[test]
    fn an_extreme_negative_literal_round_trips_through_the_backend() {
        // the largest-magnitude literal that IS a valid i64: i64::MAX. negated,
        // it is `i64::MIN + 1`. the source is unary `Neg` over that literal --
        // the literal itself goes through `emit_int_literal`'s `ldr =` path
        // (well past the 16-bit `mov` range) and `neg x0, x0` applies the sign.
        // the backend emits clean assembly, no panic, no overflow.
        let src = format!("fn f() -> i64 {{ -{} }}", i64::MAX);
        let asm = emit_expr(&src, "f");
        assert!(
            asm.contains(&format!("ldr     x0, ={}", i64::MAX)),
            "the i64::MAX magnitude must use the literal pool: {asm}"
        );
        assert!(
            asm.contains("neg     x0, x0"),
            "the unary minus must negate: {asm}"
        );
    }

    #[test]
    fn a_bool_literal_emits_one_or_zero() {
        assert!(emit_expr("fn f() -> bool { true }", "f").contains("mov     x0, 1"));
        assert!(emit_expr("fn f() -> bool { false }", "f").contains("mov     x0, 0"));
    }

    #[test]
    fn an_ident_loads_its_parameter_slot() {
        // a one-parameter function: the parameter lives at [fp, 16], just
        // above the saved fp/lr pair.
        let asm = emit_expr("fn f(a: i64) -> i64 { a }", "f");
        assert!(asm.contains("ldr     x0, [fp, 16]"), "{asm}");
    }

    #[test]
    fn paren_is_transparent() {
        // `(7)` emits exactly what `7` does -- no extra instruction.
        let asm = emit_expr("fn f() -> i64 { (7) }", "f");
        assert!(asm.contains("mov     x0, 7"), "{asm}");
    }

    #[test]
    fn addition_emits_the_spill_discipline_and_add() {
        let asm = emit_expr("fn f() -> i64 { 1 + 2 }", "f");
        assert!(asm.contains("str     x0, [fp, "), "missing spill: {asm}");
        assert!(asm.contains("ldr     x9, [fp, "), "missing reload: {asm}");
        assert!(asm.contains("add     x0, x9, x0"), "missing add: {asm}");
    }

    #[test]
    fn subtraction_and_multiplication_emit_sub_and_mul() {
        assert!(emit_expr("fn f() -> i64 { 5 - 3 }", "f").contains("sub     x0, x9, x0"));
        assert!(emit_expr("fn f() -> i64 { 5 * 3 }", "f").contains("mul     x0, x9, x0"));
    }

    #[test]
    fn division_emits_signed_sdiv() {
        let asm = emit_expr("fn f() -> i64 { 9 / 3 }", "f");
        assert!(asm.contains("sdiv    x0, x9, x0"), "{asm}");
        assert!(!asm.contains("udiv"), "i64 division must be signed: {asm}");
    }

    #[test]
    fn modulo_emits_the_sdiv_msub_idiom() {
        let asm = emit_expr("fn f() -> i64 { 9 % 4 }", "f");
        assert!(
            asm.contains("sdiv    x10, x9, x0"),
            "missing quotient: {asm}"
        );
        assert!(
            asm.contains("msub    x0, x10, x0, x9"),
            "missing msub: {asm}"
        );
    }

    #[test]
    fn each_comparison_emits_cmp_and_the_signed_condition() {
        for (src, cond) in [
            ("fn f() -> bool { 1 == 2 }", "cset    x0, eq"),
            ("fn f() -> bool { 1 != 2 }", "cset    x0, ne"),
            ("fn f() -> bool { 1 < 2 }", "cset    x0, lt"),
            ("fn f() -> bool { 1 <= 2 }", "cset    x0, le"),
            ("fn f() -> bool { 1 > 2 }", "cset    x0, gt"),
            ("fn f() -> bool { 1 >= 2 }", "cset    x0, ge"),
        ] {
            let asm = emit_expr(src, "f");
            assert!(
                asm.contains("cmp     x9, x0"),
                "missing cmp for {src}: {asm}"
            );
            assert!(asm.contains(cond), "missing `{cond}` for {src}: {asm}");
        }
    }

    #[test]
    fn comparisons_use_signed_not_unsigned_conditions() {
        // i64 is signed; the unsigned condition codes must never appear.
        let asm = emit_expr("fn f() -> bool { 1 < 2 }", "f");
        for unsigned in ["lo", "ls", "hi", "hs"] {
            assert!(!asm.contains(&format!("cset    x0, {unsigned}")), "{asm}");
        }
    }

    #[test]
    fn logical_and_emits_the_short_circuit_labels() {
        let asm = emit_expr("fn f() -> bool { true && false }", "f");
        assert!(asm.contains(".Land_false_"), "missing settle label: {asm}");
        assert!(asm.contains(".Land_done_"), "missing done label: {asm}");
        assert!(
            asm.contains("cbz "),
            "&& must short-circuit with cbz: {asm}"
        );
    }

    #[test]
    fn logical_or_emits_the_short_circuit_labels() {
        let asm = emit_expr("fn f() -> bool { true || false }", "f");
        assert!(asm.contains(".Lor_true_"), "missing settle label: {asm}");
        assert!(asm.contains(".Lor_done_"), "missing done label: {asm}");
        assert!(
            asm.contains("cbnz"),
            "|| must short-circuit with cbnz: {asm}"
        );
    }

    #[test]
    fn not_emits_an_eor() {
        let asm = emit_expr("fn f() -> bool { !true }", "f");
        assert!(asm.contains("eor     x0, x0, 1"), "{asm}");
    }

    #[test]
    fn neg_emits_a_neg() {
        let asm = emit_expr("fn f() -> i64 { -5 }", "f");
        assert!(asm.contains("neg     x0, x0"), "{asm}");
    }

    #[test]
    fn a_nested_expression_claims_distinct_scratch_slots() {
        // `(1 + 2) * (3 + 4)`: the outer `*` spills its LHS while the RHS
        // subtree (itself a `+`) runs and spills again -- the two live spills
        // must land in distinct slots, so two different [fp, N] offsets appear.
        let asm = emit_expr("fn f() -> i64 { (1 + 2) * (3 + 4) }", "f");
        let mut spill_slots: Vec<&str> = asm
            .lines()
            .filter(|l| l.contains("str     x0, [fp, ") && l.contains("spill lhs"))
            .collect();
        spill_slots.sort();
        spill_slots.dedup();
        assert!(
            spill_slots.len() >= 2,
            "nested ops must use >= 2 distinct scratch slots: {asm}"
        );
        assert!(asm.contains("mul     x0, x9, x0"), "{asm}");
    }

    #[test]
    fn an_unsupported_construct_returns_an_error_not_a_panic() {
        // a float literal is outside the integer core -- compile_expr must
        // return a QalaError carrying the float's span, never panic.
        let src = "fn f() -> f64 { 3.5 }";
        let expr = trailing_expr(src, "f");
        let mut backend = Arm64Backend::new(src);
        let err = backend
            .compile_expr(&expr)
            .expect_err("a float must be rejected");
        match err {
            QalaError::Type { message, .. } => {
                assert!(message.contains("float"), "message: {message}");
            }
            other => panic!("expected QalaError::Type, got {other:?}"),
        }
    }

    /// compile a whole multi-function program to assembly, panicking on a
    /// backend error. unlike `emit_expr`, this runs the full `compile_arm64`
    /// path, so the `fn_names` set is populated and a call resolves.
    fn compile_program_ok(src: &str) -> String {
        let tokens = Lexer::tokenize(src).expect("lex failed");
        let ast = Parser::parse(&tokens).expect("parse failed");
        let (typed, terrors, _) = check_program(&ast, src);
        assert!(terrors.is_empty(), "typecheck errors: {terrors:?}");
        super::super::compile_arm64(&typed, src).unwrap_or_else(|e| panic!("arm64 errors: {e:?}"))
    }

    /// the instruction lines of `caller`'s emitted function body -- the lines
    /// between the `caller:` label and the `.Lcaller_epilogue:` label.
    fn caller_body(asm: &str) -> Vec<String> {
        asm.lines()
            .skip_while(|l| !l.starts_with("caller:"))
            .take_while(|l| !l.trim_start().starts_with(".Lcaller_epilogue"))
            .map(|l| l.to_string())
            .collect()
    }

    #[test]
    fn a_call_to_a_user_function_emits_argument_spills_loads_and_a_bl() {
        // `add3(1, 2, 3)`: three args spilled to scratch slots, then loaded
        // into x0/x1/x2, then `bl add3`.
        let asm = compile_program_ok(
            "fn add3(a: i64, b: i64, c: i64) -> i64 { a + b + c }\n\
             fn caller() -> i64 { add3(1, 2, 3) }",
        );
        let body = caller_body(&asm).join("\n");
        // three argument spills, one per argument.
        let spills = body.lines().filter(|l| l.contains("// arg ")).count();
        assert_eq!(spills, 3, "three arguments -> three spills: {body}");
        // the loads into the AAPCS64 argument registers.
        assert!(
            body.contains("ldr     x0, [fp, "),
            "missing x0 load: {body}"
        );
        assert!(
            body.contains("ldr     x1, [fp, "),
            "missing x1 load: {body}"
        );
        assert!(
            body.contains("ldr     x2, [fp, "),
            "missing x2 load: {body}"
        );
        assert!(body.contains("bl      add3"), "missing the bl: {body}");
    }

    #[test]
    fn a_call_loads_arguments_after_every_spill_not_interleaved() {
        // the ordering trap: all three `str` spills must come before the first
        // `ldr` load -- otherwise evaluating arg 1 would clobber arg 0 in x0.
        let asm = compile_program_ok(
            "fn add3(a: i64, b: i64, c: i64) -> i64 { a }\n\
             fn caller() -> i64 { add3(1, 2, 3) }",
        );
        let body = caller_body(&asm);
        let last_spill = body
            .iter()
            .rposition(|l| l.contains("str     x0, [fp, ") && l.contains("// arg "))
            .expect("no argument spill");
        let first_load = body
            .iter()
            .position(|l| l.contains("ldr     x0, [fp, "))
            .expect("no x0 load");
        assert!(
            last_spill < first_load,
            "every spill must precede the first load: {body:?}"
        );
    }

    #[test]
    fn a_call_with_no_arguments_emits_just_a_bl() {
        // a zero-argument call: no spill, no load, just the `bl`.
        let asm = compile_program_ok(
            "fn answer() -> i64 { 42 }\n\
             fn caller() -> i64 { answer() }",
        );
        let body = caller_body(&asm).join("\n");
        assert!(body.contains("bl      answer"), "missing the bl: {body}");
        assert!(
            !body.contains("// arg "),
            "a no-arg call spills nothing: {body}"
        );
    }

    #[test]
    fn a_nested_call_emits_two_bls_with_the_inner_call_first() {
        // `outer(inner(5))`: the inner call must complete -- its own bl -- and
        // leave its result in x0 BEFORE the outer call spills that x0 as its
        // argument. so `bl inner` precedes the outer call's argument spill,
        // which precedes `bl outer`.
        let asm = compile_program_ok(
            "fn inner(n: i64) -> i64 { n + 1 }\n\
             fn outer(n: i64) -> i64 { n * 2 }\n\
             fn caller() -> i64 { outer(inner(5)) }",
        );
        let body = caller_body(&asm);
        let inner_bl = body
            .iter()
            .position(|l| l.contains("bl      inner"))
            .expect("missing bl inner");
        let outer_bl = body
            .iter()
            .position(|l| l.contains("bl      outer"))
            .expect("missing bl outer");
        assert!(
            inner_bl < outer_bl,
            "the inner call must run first: {body:?}"
        );
        // every argument spill: the inner call's own argument (5) spills
        // BEFORE `bl inner`; the outer call's argument -- the inner call's x0
        // result -- spills AFTER `bl inner`. the FIRST spill after `bl inner`
        // captures that result and must precede `bl outer`.
        let outer_spill = body
            .iter()
            .enumerate()
            .find(|(idx, l)| {
                *idx > inner_bl && l.contains("str     x0, [fp, ") && l.contains("// arg ")
            })
            .map(|(idx, _)| idx)
            .expect("missing the outer-call argument spill after bl inner");
        assert!(
            outer_spill < outer_bl,
            "the inner result is spilled between the two bls: {body:?}"
        );
    }

    #[test]
    fn a_call_to_a_stdlib_function_is_rejected_cleanly() {
        // `abs` is a stdlib function, not a user function -- it is not in
        // `fn_names`, so the backend rejects the call with a clean QalaError
        // (the stdlib is Phase 13), never a panic, never a bl. `abs(i64)`
        // type-checks to `i64`, so the rejection is the backend's, not the
        // typechecker's.
        let src = "fn caller() -> i64 { abs(-1) }";
        let tokens = Lexer::tokenize(src).expect("lex failed");
        let ast = Parser::parse(&tokens).expect("parse failed");
        let (typed, terrors, _) = check_program(&ast, src);
        assert!(terrors.is_empty(), "typecheck errors: {terrors:?}");
        let err =
            super::super::compile_arm64(&typed, src).expect_err("a stdlib call must be rejected");
        assert!(
            err[0].message().contains("abs"),
            "message: {:?}",
            err[0].message()
        );
    }

    #[test]
    fn a_user_function_named_println_routes_as_a_user_call_not_the_printf_path() {
        // WR-01 regression. the typechecker does not reserve `print` /
        // `println`: a program may declare its own `fn println(n: i64) -> i64`,
        // and the bytecode backend runs that user function. the arm64 backend
        // must agree -- a call to the shadowing user `println` lowers to an
        // ordinary `bl println`, NOT the interpolation-to-printf lowering
        // (which would emit `bl printf` and reject the i64 argument as a
        // "string expression"). the printf path is reachable only by an
        // UNSHADOWED `print` / `println` builtin.
        let asm = compile_program_ok(
            "fn println(n: i64) -> i64 { n }\n\
             fn main() -> i64 { println(5) }",
        );
        let body = fn_body(&asm, "main");
        // the shadowing call is an ordinary user `bl println`.
        assert!(
            body.iter().any(|l| l.trim() == "bl      println"),
            "the shadowed `println` must lower to a `bl println` user call: {body:?}"
        );
        // and the printf path was NOT taken: no `bl printf` anywhere, and the
        // argument 5 is spilled as a normal user-call argument.
        assert!(
            !asm.contains("bl      printf"),
            "a shadowed `println` must not route to the printf lowering: {asm}"
        );
        assert!(
            body.iter().any(|l| l.contains("// arg 0")),
            "the i64 argument must spill as a user-call argument: {body:?}"
        );
    }

    #[test]
    fn a_call_with_a_non_ident_callee_is_rejected_cleanly() {
        // a computed callee -- here a parenthesised expression in callee
        // position -- is outside the integer core. build the typed AST
        // directly: the callee is a Paren, not an Ident.
        use crate::types::QalaType;
        let call = TypedExpr::Call {
            callee: Box::new(TypedExpr::Paren {
                inner: Box::new(TypedExpr::Int {
                    value: 0,
                    ty: QalaType::I64,
                    span: Span::new(0, 1),
                }),
                ty: QalaType::I64,
                span: Span::new(0, 3),
            }),
            args: vec![],
            ty: QalaType::I64,
            span: Span::new(0, 5),
        };
        let mut backend = Arm64Backend::new("");
        let err = backend
            .compile_expr(&call)
            .expect_err("a computed callee must be rejected");
        match err {
            QalaError::Type { message, .. } => {
                assert!(message.contains("computed callee"), "message: {message}");
            }
            other => panic!("expected QalaError::Type, got {other:?}"),
        }
    }

    /// the instruction lines of `name`'s emitted function body -- the lines
    /// from the `name:` label to its `.L<name>_epilogue:` label. a generalised
    /// [`caller_body`] for tests that inspect a function other than `caller`.
    fn fn_body(asm: &str, name: &str) -> Vec<String> {
        let label = format!("{name}:");
        let epilogue = format!(".L{name}_epilogue");
        asm.lines()
            .skip_while(|l| l.trim() != label)
            .take_while(|l| !l.trim_start().starts_with(&epilogue))
            .map(|l| l.to_string())
            .collect()
    }

    /// the `[fp, N]` byte offset referenced by an instruction line, or `None`
    /// if the line has no `[fp, ...]` operand.
    fn fp_offset(line: &str) -> Option<i64> {
        let start = line.find("[fp, ")? + "[fp, ".len();
        let rest = &line[start..];
        let end = rest.find(']')?;
        rest[..end].trim().parse().ok()
    }

    #[test]
    fn a_nested_call_in_a_later_argument_does_not_clobber_an_earlier_argument() {
        // CR-01 regression. `h(100, id(7))` where `h` returns its FIRST
        // parameter: `f` must compute h(100, _) and so return 100. the bug was
        // a single shared argument run -- the nested `id(7)` reused argument
        // slot 0 and overwrote the outer call's stored 100 with 7, so `f`
        // returned 7. the fix gives each argument a distinct CLAIMED scratch
        // slot, so the nested call cannot alias the outer call's slot 0.
        //
        // the distinct-value proof: find the slot the outer `h` call stores
        // its first argument (100) into, then assert that slot is NEVER
        // written again before `bl h` consumes it. if it is provably still
        // 100 at the call, the nested call did not clobber it.
        let asm = compile_program_ok(
            "fn id(x: i64) -> i64 { x }\n\
             fn h(a: i64, b: i64) -> i64 { a }\n\
             fn f() -> i64 { h(100, id(7)) }",
        );
        let body = fn_body(&asm, "f");
        // the line that loads the constant 100 into x0.
        let mov_100 = body
            .iter()
            .position(|l| l.trim() == "mov     x0, 100")
            .expect("missing `mov x0, 100` for the outer call's first argument");
        // the very next line spills x0 -- the outer call's argument 0.
        let spill_line = &body[mov_100 + 1];
        assert!(
            spill_line.contains("str     x0, [fp, ") && spill_line.contains("// arg 0"),
            "the 100 must be spilled as argument 0 right after the mov: {body:?}"
        );
        let arg0_slot = fp_offset(spill_line).expect("argument 0 spill has no [fp, N]");
        // the `bl h` that consumes the loaded arguments.
        let bl_h = body
            .iter()
            .position(|l| l.trim() == "bl      h")
            .expect("missing `bl h`");
        // between spilling 100 and `bl h`, the argument-0 slot must not be
        // written again -- not by the nested `id(7)` call, not by anything.
        let clobber = body[mov_100 + 2..bl_h]
            .iter()
            .find(|l| l.contains("str ") && fp_offset(l) == Some(arg0_slot));
        assert!(
            clobber.is_none(),
            "argument 0 (100) at [fp, {arg0_slot}] was overwritten before `bl h`: \
             {clobber:?} in {body:?}"
        );
        // and the inner call genuinely runs in between -- the test would be
        // vacuous if `id(7)` were folded away.
        assert!(
            body[mov_100 + 2..bl_h]
                .iter()
                .any(|l| l.trim() == "bl      id"),
            "the nested `id(7)` call must run between the two outer arguments: {body:?}"
        );
    }

    #[test]
    fn a_deep_arithmetic_call_argument_keeps_every_slot_inside_the_frame() {
        // CR-02 regression. `id(((a+1)*(a+2)) - ((a+3)*(a+4)))` -- a call whose
        // single argument is a deeply-nested arithmetic expression. the bug
        // was that the frame planner's spill-depth pre-walk had no `Call` arm,
        // so the call contributed zero scratch depth even though its argument
        // expression claims scratch slots; the emitter then stored past the
        // end of the frame -- stack corruption. the fix descends into call
        // arguments, so the frame reserves enough scratch.
        //
        // the proof: read the function's own frame size from its epilogue
        // (`ldp fp, lr, [sp], dealloc`) and assert every `[fp, N]` the body
        // touches satisfies N < dealloc -- inside the frame the function owns.
        let asm = compile_program_ok(
            "fn id(x: i64) -> i64 { x }\n\
             fn f(a: i64) -> i64 { id(((a+1)*(a+2)) - ((a+3)*(a+4))) }",
        );
        // the whole `f` function, label through epilogue, so the `ldp` line
        // (which sits after the epilogue label) is included.
        let f_fn: Vec<&str> = asm
            .lines()
            .skip_while(|l| l.trim() != "f:")
            .take_while(|l| l.trim() != "ret" && !l.trim().is_empty())
            .collect();
        // the epilogue's `ldp fp, lr, [sp], N` carries the frame size N.
        let dealloc: i64 = f_fn
            .iter()
            .find_map(|l| {
                let t = l.trim();
                t.strip_prefix("ldp     fp, lr, [sp], ")
                    .and_then(|n| n.trim().parse().ok())
            })
            .expect("missing the epilogue `ldp` line with the frame size");
        // every fp-relative store/load must land strictly inside [fp, 0]..
        // [fp, dealloc-1]: fp == sp after `mov fp, sp`, so [fp, dealloc] and
        // beyond is the caller's frame.
        for line in &f_fn {
            if let Some(offset) = fp_offset(line) {
                assert!(
                    offset < dealloc,
                    "`{}` writes [fp, {offset}] -- outside the {dealloc}-byte frame",
                    line.trim()
                );
                assert!(offset >= 0, "`{}` has a negative fp offset", line.trim());
            }
        }
        // the test is only meaningful if the body actually spilled to scratch
        // -- a deep expression must produce several distinct spill slots.
        let spill_slots: std::collections::BTreeSet<i64> = f_fn
            .iter()
            .filter(|l| l.contains("str     x0, [fp, "))
            .filter_map(|l| fp_offset(l))
            .collect();
        assert!(
            spill_slots.len() >= 3,
            "a deep arithmetic argument must use several scratch slots: {f_fn:?}"
        );
    }
}