midenc-codegen-masm 0.7.1

Miden Assembly backend for the Miden compiler
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
use miden_core::{Felt, FieldElement};
use midenc_hir::Overflow;

use super::*;

impl OpEmitter<'_> {
    /// Truncate an integral value of type `src` to type `dst`
    ///
    /// Truncation is defined in terms of the bitwise representation of the type.
    /// Specifically, the source value will have any excess bits above the bitwidth of
    /// the `dst` type either zeroed, or dropped, depending on the `src` type. For example,
    /// u64 is represented as two 32-bit limbs, each in a field element on the operand stack;
    /// while u16 is represented as 16 bits in a single field element. Truncating from u64
    /// to u16 results in dropping the 32-bit limb containing the most significant bits, and
    /// then masking out the most significant 16 bits of the remaining 32-bit limb, leaving
    /// a 16-bit value on the operand stack.
    ///
    /// NOTE: Field elements do not have a formal bitwise representation. When truncating to
    /// felt and the source value is negative, the resulting felt will be `Felt::ZERO`. When
    /// the value is non-negative, the source value will be mapped to the field element range
    /// using the field modulus of `2^64 - 2^32 + 1`, and then convert the representation to
    /// felt by multiplying the 32-bit limbs (the only values which can be truncated to felt
    /// are u64, i64, and i128, all of which use multiple 32-bit limbs).
    ///
    /// This function assumes that an integer value of type `src` is on top of the operand stack,
    /// and will ensure a value of type `dst` is on the operand stack after truncation, or that
    /// execution traps.
    pub fn trunc(&mut self, dst: &Type, span: SourceSpan) {
        let arg = self.stack.pop().expect("operand stack is empty");
        let src = arg.ty();
        assert!(
            src.is_integer() && dst.is_integer(),
            "invalid truncation of {src} to {dst}: only integer-to-integer casts are supported"
        );
        let n = dst.size_in_bits() as u32;
        match (&src, dst) {
            // If the types are equivalent, it's a no-op
            (src, dst) if src == dst => (),
            (Type::Felt, _) if n <= 32 => self.trunc_felt(n, span),
            // Truncating i128 to u128, and vice versa is a bitcast
            (Type::I128 | Type::U128, Type::U128 | Type::I128) => (),
            // Truncating to felt
            (Type::U128 | Type::I128, Type::Felt) => self.trunc_i128_to_felt(span),
            // Truncating a 128-bit integer to 64 bits or smaller
            (Type::U128 | Type::I128, _) if n <= 64 => self.trunc_i128(n, span),
            // Truncating i64/u64 to felt
            (Type::I64 | Type::U64, Type::Felt) => self.trunc_int64_to_felt(span),
            // Truncating i64 to u64, and vice versa is a bitcast
            (Type::I64 | Type::U64, Type::U64 | Type::I64) => (),
            // Truncating a u64/i64 to 32 bits or smaller
            (Type::I64 | Type::U64, _) if n <= 32 => self.trunc_int64(n, span),
            // Truncating a felt to 32 bits or smaller
            (Type::Felt, _) if n <= 32 => self.trunc_felt(n, span),
            // Truncating i32 to u32, and vice versa is a bitcast
            (Type::I32 | Type::U32, Type::U32 | Type::I32) => (),
            // Truncating an i32/u32 to smaller than 32 bits
            (Type::I32 | Type::U32, _) if n <= 32 => self.trunc_int32(n, span),
            // Truncating i16 to u16, and vice versa is a bitcast
            (Type::I16 | Type::U16, Type::U16 | Type::I16) => (),
            // Truncating an i16/u16 to smaller than 16 bits
            (Type::I16 | Type::U16, _) if n <= 16 => self.trunc_int32(n, span),
            // Truncating i8 to u8, and vice versa is a bitcast
            (Type::I8 | Type::U8, Type::U8 | Type::I8) => (),
            // Truncating an i8/u8 to smaller than 8 bits
            (Type::I8 | Type::U8, _) if n <= 8 => self.trunc_int32(n, span),
            (src, dst) => unimplemented!("unsupported truncation of {src} to {dst}"),
        }
        self.push(dst.clone());
    }

    /// Zero-extend an unsigned integral value of type `src` to type `dst`
    ///
    /// This function will panic if the source or target types are not unsigned integral types.
    /// Despite its type name, i1 is an unsigned value, because it may only represent 1 or 0.
    ///
    /// Zero-extension is defined in terms of the bitwise representation of the type.
    /// Specifically, the source value will have any excess bits above the bitwidth of
    /// the `src` type either added as zeroes, or set to zero, depending on the `dst` type.
    /// For example, u16 is represented as 16 bits in a single field element, while u64 is
    /// represented as two 32-bit limbs, each in a separate field element. Zero-extending
    /// from u16 to u64 requires only pushing a new element of `Felt::ZERO` on the operand stack.
    /// Since the upper 16 bits of the original 32-bit field element value must already be zero,
    /// we only needed to pad out the representation with an extra zero element to obtain the
    /// corresponding u64.
    ///
    /// NOTE: Field elements do not have a formal bitwise representation. However, types with a
    /// bitwidth of 32 bits or smaller are transparently represented as field elements in the VM,
    /// so zero-extending to felt from such a type is a no-op. Even though a field element is
    /// notionally a 64-bit value in memory, it is not equivalent in range to a 64-bit integer,
    /// so 64-bit integers and above require the use of multiple 32-bit limbs, to provide a two's
    /// complement bitwise representation; so there are no types larger than 32-bits that are
    /// zero-extendable to felt, but are not representable as a felt transparently.
    ///
    /// This function assumes that an integer value of type `src` is on top of the operand stack,
    /// and will ensure a value of type `dst` is on the operand stack after truncation, or that
    /// execution traps.
    pub fn zext(&mut self, dst: &Type, span: SourceSpan) {
        let arg = self.stack.pop().expect("operand stack is empty");
        let src = arg.ty();
        let src_bits = src.size_in_bits() as u32;
        let dst_bits = dst.size_in_bits() as u32;
        assert!(
            src_bits <= dst_bits,
            "invalid zero-extension from {src} to {dst}: cannot zero-extend to a smaller type"
        );
        match (&src, dst) {
            // If the types are equivalent, it's a no-op, but only if they are integers
            (src, dst) if src == dst => (),
            // Zero-extending a u64 to i128 simply requires pushing a 0u64 on the stack
            (Type::U64, Type::U128 | Type::I128) => self.push_u64(0, span),
            (Type::Felt, Type::U64 | Type::U128 | Type::I128) => self.zext_felt(dst_bits, span),
            (Type::U32, Type::U64 | Type::I64 | Type::U128 | Type::I128) => {
                self.zext_int32(dst_bits, span)
            }
            (Type::I1 | Type::U8 | Type::U16, Type::U64 | Type::I64 | Type::U128 | Type::I128) => {
                self.zext_smallint(src_bits, dst_bits, span)
            }
            // Zero-extending to u32/i32 from smaller integers is a no-op
            (Type::I1 | Type::U8 | Type::U16, Type::U32 | Type::I32) => (),
            // Zero-extending to felt, from types that fit in felt, is a no-op
            (Type::I1 | Type::U8 | Type::U16 | Type::U32, Type::Felt) => (),
            (src, dst) if dst.is_signed_integer() => panic!(
                "invalid zero-extension from {src} to {dst}: value may not fit in range, use \
                 explicit cast instead"
            ),
            (src, dst) => panic!("unsupported zero-extension from {src} to {dst}"),
        }
        self.push(dst.clone());
    }

    /// Sign-extend an integral value of type `src` to type `dst`
    ///
    /// This function will panic if the target type is not a signed integral type.
    /// To extend unsigned integer types to a larger unsigned integer type, use `zext`.
    /// To extend signed integer types to an equal or larger unsigned type, use `cast`.
    ///
    /// Sign-extension is defined in terms of the bitwise representation of the type.
    /// Specifically, the sign bit of the source value will be propagated to all excess
    /// bits added to the representation of `src` to represent `dst`.
    ///
    /// For example, i16 is represented as 16 bits in a single field element, while i64 is
    /// represented as two 32-bit limbs, each in a separate field element. Sign-extending
    /// the i16 value -128, to i64, requires propagating the sign bit value, 1 since -128
    /// is a negative number, to the most significant 32-bits of the input element, as well
    /// as pushing an additional element representing `u32::MAX` on the operand stack. This
    /// gives us a bitwise representation where the most significant 48 bits are all 1s, and
    /// the last 16 bits are the same as the original input value, giving us the i64
    /// representation of -128.
    ///
    /// NOTE: Field elements cannot be sign-extended to i64, you must an explicit cast, as the
    /// range of the field means that it is not guaranteed that the felt will fit in the i64
    /// range.
    ///
    /// This function assumes that an integer value of type `src` is on top of the operand stack,
    /// and will ensure a value of type `dst` is on the operand stack after truncation, or that
    /// execution traps.
    pub fn sext(&mut self, dst: &Type, span: SourceSpan) {
        let arg = self.stack.pop().expect("operand stack is empty");
        let src = arg.ty();
        assert!(
            src.is_integer() && dst.is_signed_integer(),
            "invalid sign-extension of {src} to {dst}: only integer-to-signed-integer casts are \
             supported"
        );
        let src_bits = src.size_in_bits() as u32;
        let dst_bits = dst.size_in_bits() as u32;
        assert!(
            src_bits <= dst_bits,
            "invalid zero-extension from {src} to {dst}: cannot zero-extend to a smaller type"
        );
        match (&src, dst) {
            // If the types are equivalent, it's a no-op
            (src, dst) if src == dst => (),
            (Type::U64 | Type::I64, Type::I128) => self.sext_int64(128, span),
            (Type::Felt, Type::I64 | Type::I128) => self.sext_felt(dst_bits, span),
            (Type::I32 | Type::U32, Type::I64 | Type::I128) => self.sext_int32(dst_bits, span),
            (
                Type::I1 | Type::I8 | Type::U8 | Type::I16 | Type::U16,
                Type::I32 | Type::I64 | Type::I128,
            ) => self.sext_smallint(src_bits, dst_bits, span),
            (src, dst) => panic!("unsupported sign-extension from {src} to {dst}"),
        }
        self.push(dst.clone());
    }

    pub fn bitcast(&mut self, dst: &Type, _span: SourceSpan) {
        let arg = self.stack.pop().expect("operand stack is empty");
        let src = arg.ty();
        assert!(
            (src.is_integer() && dst.is_integer()) || (src.is_pointer() && dst.is_pointer()),
            "invalid cast of {src} to {dst}: only integer-to-integer or pointer-to-pointer \
             bitcasts are supported"
        );
        self.push(dst.clone());
    }

    /// Convert between two integral types, given as `src` and `dst`,
    /// indicating the direction of the conversion.
    ///
    /// This function will panic if either type is not an integer.
    ///
    /// The specific semantics of a cast are dependent on the pair of types involved,
    /// but the general rules are as follows:
    ///
    /// * Any integer-to-integer cast is allowed
    /// * Casting a signed integer to an unsigned integer will assert that the input value is
    ///   unsigned
    /// * Casting a type with a larger range to a type with a smaller one will assert that the input
    ///   value fits within that range, e.g. u8 to i8, i16 to i8, etc.
    /// * Casting to a larger unsigned type will use zero-extension
    /// * Casting a signed type to a larger signed type will use sign-extension
    /// * Casting an unsigned type to a larger signed type will use zero-extension
    ///
    /// As a rule, the input value must be representable in the target type, or an
    /// assertion will be raised. Casts are intended to faithfully translate a value
    /// in one type to the same value in another type.
    ///
    /// This function assumes that an integer value of type `src` is on top of the operand stack,
    /// and will ensure a value of type `dst` is on the operand stack after truncation, or that
    /// execution traps.
    pub fn cast(&mut self, dst: &Type, span: SourceSpan) {
        let arg = self.stack.pop().expect("operand stack is empty");
        let src = arg.ty();
        assert!(
            src.is_integer() && dst.is_integer(),
            "invalid cast of {src} to {dst}: only integer-to-integer casts are supported"
        );

        let src_bits = src.size_in_bits() as u32;
        let dst_bits = dst.size_in_bits() as u32;
        match (&src, dst) {
            // u128
            (Type::U128, Type::I128) => self.assert_unsigned_int128(span),
            (Type::U128, Type::I64) => self.u128_to_i64(span),
            (Type::U128 | Type::I128, Type::U64) => self.int128_to_u64(span),
            (Type::U128 | Type::I128, Type::Felt) => self.int128_to_felt(span),
            (Type::U128 | Type::I128, Type::U32) => self.int128_to_u32(span),
            (Type::U128 | Type::I128, Type::U16 | Type::U8 | Type::I1) => {
                self.int128_to_u32(span);
                self.int32_to_uint(dst_bits, span);
            }
            (Type::U128, Type::I32) => {
                self.int128_to_u32(span);
                self.assert_unsigned_int32(span);
            }
            (Type::U128, Type::I16 | Type::I8) => {
                self.int128_to_u32(span);
                self.int32_to_int(dst_bits, span);
            }
            // i128
            (Type::I128, Type::I64) => self.i128_to_i64(span),
            (Type::I128, Type::I32 | Type::I16 | Type::I8) => {
                self.i128_to_i64(span);
                self.i64_to_int(dst_bits, span);
            }
            // i64
            (Type::I64, Type::I128) => self.sext_int64(128, span),
            (Type::I64, Type::U128) => self.zext_int64(128, span),
            (Type::I64, Type::U64) => self.assert_unsigned_int64(span),
            (Type::I64, Type::Felt) => self.i64_to_felt(span),
            (Type::I64, Type::U32 | Type::U16 | Type::U8 | Type::I1) => {
                self.assert_unsigned_int64(span);
                self.u64_to_uint(dst_bits, span);
            }
            (Type::I64, Type::I32 | Type::I16 | Type::I8) => {
                self.i64_to_int(dst_bits, span);
            }
            // u64
            (Type::U64, Type::I128 | Type::U128) => self.zext_int64(128, span),
            (Type::U64, Type::I64) => self.assert_i64(span),
            (Type::U64, Type::Felt) => self.u64_to_felt(span),
            (Type::U64, Type::U32 | Type::U16 | Type::U8 | Type::I1) => {
                self.u64_to_uint(dst_bits, span);
            }
            (Type::U64, Type::I32 | Type::I16 | Type::I8) => {
                // Convert to N bits as unsigned
                self.u64_to_uint(dst_bits, span);
                // Verify that the input value is still unsigned
                self.assert_unsigned_smallint(dst_bits, span);
            }
            // felt
            (Type::Felt, Type::I64 | Type::I128) => self.sext_felt(dst_bits, span),
            (Type::Felt, Type::U128) => self.zext_felt(dst_bits, span),
            (Type::Felt, Type::U64) => self.felt_to_u64(span),
            (Type::Felt, Type::U32 | Type::U16 | Type::U8 | Type::I1) => {
                self.felt_to_uint(dst_bits, span);
            }
            (Type::Felt, Type::I32 | Type::I16 | Type::I8) => {
                self.felt_to_int(dst_bits, span);
            }
            // u32
            (Type::U32, Type::I64 | Type::U64 | Type::I128) => self.zext_int32(dst_bits, span),
            (Type::U32, Type::I32) => self.assert_i32(span),
            (Type::U32, Type::U16 | Type::U8 | Type::I1) => {
                self.int32_to_uint(dst_bits, span);
            }
            (Type::U32, Type::I16 | Type::I8) => self.int32_to_int(dst_bits, span),
            // i32
            (Type::I32, Type::I64 | Type::I128) => self.sext_int32(dst_bits, span),
            (Type::I32, Type::U64) => {
                self.assert_i32(span);
                self.emit_push(0u32, span);
            }
            (Type::I32, Type::U32) => {
                self.assert_i32(span);
            }
            (Type::I32, Type::U16 | Type::U8 | Type::I1) => {
                self.int32_to_uint(dst_bits, span);
            }
            (Type::I32, Type::I16 | Type::I8) => self.int32_to_int(dst_bits, span),
            // i8/i16
            (Type::I8 | Type::I16, Type::I32 | Type::I64 | Type::I128) => {
                self.sext_smallint(src_bits, dst_bits, span);
            }
            (Type::I8 | Type::I16, Type::U32 | Type::U64) => {
                self.assert_unsigned_smallint(src_bits, span);
                self.zext_smallint(src_bits, dst_bits, span);
            }
            (Type::I16, Type::U16) | (Type::I8, Type::U8) => {
                self.assert_unsigned_smallint(src_bits, span);
            }
            (Type::I16, Type::U8 | Type::I1) => self.int32_to_int(dst_bits, span),
            (Type::I16, Type::I8) => self.int32_to_int(dst_bits, span),
            (Type::I8, Type::I1) => {
                // Assert that input is either 0 or 1
                //
                // NOTE: The comparison here is unsigned, so the sign
                // bit being set will make the i8 larger than 0 or 1
                self.emit(masm::Instruction::Dup0, span);
                self.emit_push(2u32, span);
                self.emit_all([masm::Instruction::Lt, masm::Instruction::Assert], span);
            }
            // i1
            (Type::I1, _) => self.zext_smallint(src_bits, dst_bits, span),
            (src, dst) => unimplemented!("unsupported cast from {src} to {dst}"),
        }
        self.push(dst.clone());
    }

    /// Cast `arg` to a pointer value
    pub fn inttoptr(&mut self, ty: &Type, span: SourceSpan) {
        assert!(ty.is_pointer(), "exected pointer typed argument");
        // For now, we're strict about the types of values we'll allow casting from
        let arg = self.stack.pop().expect("operand stack is empty");
        match arg.ty() {
            // We allow i32 here because Wasm uses it
            Type::U32 | Type::I32 => {
                self.push(ty.clone());
            }
            Type::Felt => {
                self.emit(masm::Instruction::U32Assert, span);
                self.push(ty.clone());
            }
            int => panic!("invalid inttoptr cast: cannot cast value of type {int} to {ty}"),
        }
    }

    /// Check if an integral value on the operand stack is an odd number.
    ///
    /// The result is placed on the stack as a boolean value.
    ///
    /// This operation consumes the input operand.
    pub fn is_odd(&mut self, span: SourceSpan) {
        let arg = self.stack.pop().expect("operand stack is empty");
        match arg.ty() {
            // For both signed and unsigned types,
            // values <= bitwidth of a felt can use
            // the native instruction because the sign
            // bit does not change whether the value is
            // odd or not
            Type::I1
            | Type::U8
            | Type::I8
            | Type::U16
            | Type::I16
            | Type::U32
            | Type::I32
            | Type::Felt => {
                self.emit(masm::Instruction::IsOdd, span);
            }
            // For i64/u64, we use the native instruction
            // on the lower limb to check for odd/even
            Type::I64 | Type::U64 => {
                self.emit_all([masm::Instruction::Drop, masm::Instruction::IsOdd], span);
            }
            // For i128, same as above, but more elements are dropped
            Type::I128 | Type::U128 => {
                self.emit_n(3, masm::Instruction::Drop, span);
                self.emit(masm::Instruction::IsOdd, span);
            }
            Type::F64 => {
                unimplemented!("is_odd support for floating-point values is not yet implemented")
            }
            ty => panic!("expected integral type for is_odd opcode, got {ty}"),
        }
        self.push(Type::I1);
    }

    /// Compute the integral base-2 logarithm of the value on top of the operand stack, and
    /// place the result back on the operand stack as a u32 value.
    ///
    /// This operation consumes the input operand.
    pub fn ilog2(&mut self, span: SourceSpan) {
        let ty = self.stack.peek().expect("operand stack is empty").ty();
        match &ty {
            Type::Felt => self.emit(masm::Instruction::ILog2, span),
            Type::I128 | Type::U128 | Type::I64 | Type::U64 => {
                // Compute the number of leading zeros
                //
                // NOTE: This function handles popping the input and pushing
                // a u32 result on the stack for us, so we can omit any stack
                // manipulation here.
                self.clz(span);
                let bits = ty.size_in_bits();
                // ilog2 is bits - clz - 1
                self.emit_push(bits as u8, span);
                self.emit_all(
                    [
                        masm::Instruction::Swap1,
                        masm::Instruction::Sub,
                        masm::Instruction::U32OverflowingSubImm(1.into()),
                        masm::Instruction::Assertz,
                    ],
                    span,
                );
            }
            Type::I32 | Type::U32 | Type::I16 | Type::U16 | Type::I8 | Type::U8 => {
                let _ = self.stack.pop();
                self.emit_all(
                    [
                        // Compute ilog2 on the advice stack
                        masm::Instruction::ILog2,
                        // Drop the operand
                        masm::Instruction::Drop,
                        // Move the result to the operand stack
                        masm::Instruction::AdvPush(1.into()),
                    ],
                    span,
                );
                self.push(Type::U32);
            }
            Type::I1 => {
                // 2^0 == 1
                let _ = self.stack.pop();
                self.emit(masm::Instruction::Drop, span);
                self.emit_push(0u8, span);
                self.push(Type::U32);
            }
            ty if !ty.is_integer() => {
                panic!("invalid ilog2 on {ty}: only integral types are supported")
            }
            ty => unimplemented!("ilog2 for {ty} is not supported"),
        }
    }

    /// Count the number of non-zero bits in the integral value on top of the operand stack,
    /// and place the count back on the stack as a u32 value.
    ///
    /// This operation consumes the input operand.
    pub fn popcnt(&mut self, span: SourceSpan) {
        let arg = self.stack.pop().expect("operand stack is empty");
        match arg.ty() {
            Type::I128 | Type::U128 => {
                self.emit_all(
                    [
                        // [x3, x2, x1, x0]
                        masm::Instruction::U32Popcnt,
                        // [popcnt3, x2, x1, x0]
                        masm::Instruction::Swap1,
                        // [x2, popcnt3, x1, x0]
                        masm::Instruction::U32Popcnt,
                        // [popcnt2, popcnt3, x1, x0]
                        masm::Instruction::Add,
                        // [popcnt_hi, x1, x0]
                        masm::Instruction::MovDn2,
                        // [x1, x0, popcnt]
                        masm::Instruction::U32Popcnt,
                        // [popcnt1, x0, popcnt]
                        masm::Instruction::Swap1,
                        // [x0, popcnt1, popcnt]
                        masm::Instruction::U32Popcnt,
                        // [popcnt0, popcnt1, popcnt]
                        //
                        // This last instruction adds all three values together mod 2^32
                        masm::Instruction::U32WrappingAdd3,
                    ],
                    span,
                );
            }
            Type::I64 | Type::U64 => {
                self.emit_all(
                    [
                        // Get popcnt of high bits
                        masm::Instruction::U32Popcnt,
                        // Swap to low bits and repeat
                        masm::Instruction::Swap1,
                        masm::Instruction::U32Popcnt,
                        // Add both counts to get the total count
                        masm::Instruction::Add,
                    ],
                    span,
                );
            }
            Type::I32 | Type::U32 | Type::I16 | Type::U16 | Type::I8 | Type::U8 | Type::I1 => {
                self.emit(masm::Instruction::U32Popcnt, span);
            }
            ty if !ty.is_integer() => {
                panic!("invalid popcnt on {ty}: only integral types are supported")
            }
            ty => unimplemented!("popcnt for {ty} is not supported"),
        }
        self.push(Type::U32);
    }

    /// Count the number of leading zero bits in the integral value on top of the operand stack,
    /// and place the count back on the stack as a u32 value.
    ///
    /// This operation is implemented so that it consumes the input operand.
    pub fn clz(&mut self, span: SourceSpan) {
        let arg = self.stack.pop().expect("operand stack is empty");
        match arg.ty() {
            Type::I128 | Type::U128 => {
                // We decompose the 128-bit value into two 64-bit limbs, and use the standard
                // library intrinsics to get the count for those limbs. We then add the count
                // for the low bits to that of the high bits, if the high bits are all zero,
                // otherwise we take just the high bit count.
                //
                // Count leading zeros in the high bits
                self.raw_exec("::miden::core::math::u64::clz", span);
                self.emit_all(
                    [
                        // [hi_clz, lo_hi, lo_lo]
                        // Count leading zeros in the low bits
                        masm::Instruction::MovUp2, // [lo_lo, hi_clz, lo_hi]
                        masm::Instruction::MovUp2, // [lo_hi, lo_lo, hi_clz]
                    ],
                    span,
                );
                self.raw_exec("::miden::core::math::u64::clz", span); // [lo_clz, hi_clz]
                // Add the low bit leading zeros to those of the high bits, if the high
                // bits are all zeros; otherwise return only the
                // high bit count
                self.emit_push(0u32, span); // [0, lo_clz, hi_clz]
                self.emit(masm::Instruction::Dup2, span); // [hi_clz, 0, lo_clz, hi_clz]
                self.emit_push(Felt::new(32), span);
                self.emit_all(
                    [
                        masm::Instruction::Lt,    // [hi_clz < 32, 0, lo_clz, hi_clz]
                        masm::Instruction::CDrop, // [hi_clz < 32 ? 0 : lo_clz, hi_clz]
                        masm::Instruction::Add,
                    ],
                    span,
                );
            }
            Type::I64 | Type::U64 => {
                self.raw_exec("::miden::core::math::u64::clz", span);
            }
            Type::I32 | Type::U32 => {
                self.emit(masm::Instruction::U32Clz, span);
            }
            Type::I16 | Type::U16 => {
                // There are always 16 leading zeroes from the perspective of the
                // MASM u32clz instruction for values of (i|u)16 type, so subtract
                // that from the count
                self.emit_all(
                    [
                        masm::Instruction::U32Clz,
                        // Subtract the excess bits from the count
                        masm::Instruction::U32WrappingSubImm(16.into()),
                    ],
                    span,
                );
            }
            Type::I8 | Type::U8 => {
                // There are always 24 leading zeroes from the perspective of the
                // MASM u32clz instruction for values of (i|u)8 type, so subtract
                // that from the count
                self.emit_all(
                    [
                        masm::Instruction::U32Clz,
                        // Subtract the excess bits from the count
                        masm::Instruction::U32WrappingSubImm(24.into()),
                    ],
                    span,
                );
            }
            Type::I1 => {
                // There is exactly one leading zero if false, or zero if true
                self.emit(masm::Instruction::EqImm(Felt::ZERO.into()), span);
            }
            ty if !ty.is_integer() => {
                panic!("invalid clz on {ty}: only integral types are supported")
            }
            ty => unimplemented!("clz for {ty} is not supported"),
        }
        self.push(Type::U32);
    }

    /// Count the number of leading one bits in the integral value on top of the operand stack,
    /// and place the count back on the stack as a u32 value.
    ///
    /// This operation is implemented so that it consumes the input operand.
    pub fn clo(&mut self, span: SourceSpan) {
        let arg = self.stack.pop().expect("operand stack is empty");
        match arg.ty() {
            // The implementation here is effectively the same as `clz`, just with minor adjustments
            Type::I128 | Type::U128 => {
                // We decompose the 128-bit value into two 64-bit limbs, and use the standard
                // library intrinsics to get the count for those limbs. We then add the count
                // for the low bits to that of the high bits, if the high bits are all one,
                // otherwise we take just the high bit count.
                //
                // Count leading ones in the high bits
                self.raw_exec("::miden::core::math::u64::clo", span); // [hi_clo, lo_hi, lo_lo]
                self.emit_all(
                    [
                        // Count leading ones in the low bits
                        masm::Instruction::MovUp2, // [lo_lo, hi_clo, lo_hi]
                        masm::Instruction::MovUp2, // [lo_hi, lo_lo, hi_clo]
                    ],
                    span,
                );
                self.raw_exec("::miden::core::math::u64::clo", span); // [lo_clo, hi_clo]
                // Add the low bit leading ones to those of the high bits, if the high bits
                // are all one; otherwise return only the high bit count
                self.emit_push(0u32, span); // [0, lo_clo, hi_clo]
                self.emit(masm::Instruction::Dup2, span); // [hi_clo, 0, lo_clo, hi_clo]
                self.emit_push(Felt::new(32), span);
                self.emit_all(
                    [
                        masm::Instruction::Lt,    // [hi_clo < 32, 0, lo_clo, hi_clo]
                        masm::Instruction::CDrop, // [hi_clo < 32 ? 0 : lo_clo, hi_clo]
                        masm::Instruction::Add,
                    ],
                    span,
                );
            }
            Type::I64 | Type::U64 => self.raw_exec("::miden::core::math::u64::clo", span),
            Type::I32 | Type::U32 => {
                self.emit(masm::Instruction::U32Clo, span);
            }
            Type::I16 | Type::U16 => {
                // There are always 16 leading zeroes from the perspective of the
                // MASM u32clo instruction for values of (i|u)16 type, so to get
                // the correct count, we need to bitwise-OR in a 16 bits of leading
                // ones, then subtract that from the final count.
                //
                // OR in the leading 16 ones
                self.emit_push(u32::MAX << 16, span);
                self.emit_all(
                    [
                        masm::Instruction::U32Or,
                        // Obtain the count
                        masm::Instruction::U32Clo,
                        // Subtract the leading bits we added from the count
                        masm::Instruction::U32WrappingSubImm(16.into()),
                    ],
                    span,
                );
            }
            Type::I8 | Type::U8 => {
                // There are always 24 leading zeroes from the perspective of the
                // MASM u32clo instruction for values of (i|u)8 type, so as with the
                // 16-bit values, we need to bitwise-OR in 24 bits of leading ones,
                // then subtract them from the final count.
                //
                // OR in the leading 24 ones
                self.emit_push(u32::MAX << 8, span);
                self.emit_all(
                    [
                        masm::Instruction::U32Or,
                        // Obtain the count
                        masm::Instruction::U32Clo,
                        // Subtract the excess bits from the count
                        masm::Instruction::U32WrappingSubImm(24.into()),
                    ],
                    span,
                );
            }
            Type::I1 => {
                // There is exactly one leading one if true, or zero if false
                self.emit(masm::Instruction::EqImm(Felt::ONE.into()), span);
            }
            ty if !ty.is_integer() => {
                panic!("invalid clo on {ty}: only integral types are supported")
            }
            ty => unimplemented!("clo for {ty} is not supported"),
        }
        self.push(Type::U32);
    }

    /// Count the number of trailing zero bits in the integral value on top of the operand stack,
    /// and place the count back on the stack as a u32 value.
    ///
    /// This operation is implemented so that it consumes the input operand.
    pub fn ctz(&mut self, span: SourceSpan) {
        let arg = self.stack.pop().expect("operand stack is empty");
        match arg.ty() {
            Type::I128 | Type::U128 => {
                // We decompose the 128-bit value into two 64-bit limbs, and use the standard
                // library intrinsics to get the count for those limbs. We then add the count
                // for the low bits to that of the high bits, if the high bits are all one,
                // otherwise we take just the high bit count.
                //
                // Count trailing zeros in the high bits
                self.raw_exec("::miden::core::math::u64::ctz", span); // [hi_ctz, lo_hi, lo_lo]
                self.emit_all(
                    [
                        // Count trailing zeros in the low bits
                        masm::Instruction::MovUp2, // [lo_lo, hi_ctz, lo_hi]
                        masm::Instruction::MovUp2, // [lo_hi, lo_lo, hi_ctz]
                    ],
                    span,
                );
                self.raw_exec("::miden::core::math::u64::ctz", span); // [lo_ctz, hi_ctz]
                // Add the high bit trailing zeros to those of the low bits, if the low
                // bits are all zero; otherwise return only the low
                // bit count
                self.emit(masm::Instruction::Swap1, span);
                self.emit_push(0u32, span); // [0, hi_ctz, lo_ctz]
                self.emit(masm::Instruction::Dup2, span); // [lo_ctz, 0, hi_ctz, lo_ctz]
                self.emit_push(Felt::new(32), span);
                self.emit_all(
                    [
                        masm::Instruction::Lt,    // [lo_ctz < 32, 0, hi_ctz, lo_ctz]
                        masm::Instruction::CDrop, // [lo_ctz < 32 ? 0 : hi_ctz, lo_ctz]
                        masm::Instruction::Add,
                    ],
                    span,
                );
            }
            Type::I64 | Type::U64 => self.raw_exec("::miden::core::math::u64::ctz", span),
            Type::I32 | Type::U32 => self.emit(masm::Instruction::U32Ctz, span),
            Type::I16 | Type::U16 => {
                // Clamp the total number of trailing zeros to 16

                // Obtain the count
                self.emit(masm::Instruction::U32Ctz, span);
                // Clamp to 16
                //   operand_stack: [16, ctz]
                self.emit_push(16u8, span);
                //   operand_stack: [ctz, 16, ctz]
                self.emit(masm::Instruction::Dup1, span);
                //   operand_stack: [ctz >= 16, 16, ctz]
                self.emit_push(Felt::new(16), span);
                self.emit_all(
                    [
                        masm::Instruction::Gte,
                        //   operand_stack: [actual_ctz]
                        masm::Instruction::CDrop,
                    ],
                    span,
                );
            }
            Type::I8 | Type::U8 => {
                // Clamp the total number of trailing zeros to 8

                // Obtain the count
                self.emit(masm::Instruction::U32Ctz, span);
                // Clamp to 8
                //   operand_stack: [8, ctz]
                self.emit_push(8u8, span);
                //   operand_stack: [ctz, 8, ctz]
                self.emit(masm::Instruction::Dup1, span);
                //   operand_stack: [ctz >= 8, 8, ctz]
                self.emit_push(Felt::new(8), span);
                self.emit_all(
                    [
                        masm::Instruction::Gte,
                        //   operand_stack: [actual_ctz]
                        masm::Instruction::CDrop,
                    ],
                    span,
                );
            }
            Type::I1 => {
                // There is exactly one trailing zero if false, or zero if true
                self.emit(masm::Instruction::EqImm(Felt::ZERO.into()), span);
            }
            ty if !ty.is_integer() => {
                panic!("invalid ctz on {ty}: only integral types are supported")
            }
            ty => unimplemented!("ctz for {ty} is not supported"),
        }
        self.push(Type::U32);
    }

    /// Count the number of trailing one bits in the integral value on top of the operand stack,
    /// and place the count back on the stack as a u32 value.
    ///
    /// This operation is implemented so that it consumes the input operand.
    pub fn cto(&mut self, span: SourceSpan) {
        let arg = self.stack.pop().expect("operand stack is empty");
        match arg.ty() {
            Type::I128 | Type::U128 => {
                // We decompose the 128-bit value into two 64-bit limbs, and use the standard
                // library intrinsics to get the count for those limbs. We then add the count
                // for the low bits to that of the high bits, if the high bits are all one,
                // otherwise we take just the high bit count.
                //
                // Count trailing ones in the high bits
                self.raw_exec("::miden::core::math::u64::cto", span); // [hi_cto, lo_hi, lo_lo]
                self.emit_all(
                    [
                        // Count trailing ones in the low bits
                        masm::Instruction::MovUp2, // [lo_lo, hi_cto, lo_hi]
                        masm::Instruction::MovUp2, // [lo_hi, lo_lo, hi_cto]
                    ],
                    span,
                );
                self.raw_exec("::miden::core::math::u64::cto", span); // [lo_cto, hi_cto]
                // Add the high bit trailing ones to those of the low bits, if the low bits
                // are all one; otherwise return only the low bit count
                self.emit(masm::Instruction::Swap1, span);
                self.emit_push(0u32, span); // [0, hi_cto, lo_cto]
                self.emit(masm::Instruction::Dup2, span); // [lo_cto, 0, hi_cto, lo_cto]
                self.emit_push(Felt::new(32), span);
                self.emit_all(
                    [
                        masm::Instruction::Lt,    // [lo_cto < 32, 0, hi_cto, lo_cto]
                        masm::Instruction::CDrop, // [lo_cto < 32 ? 0 : hi_cto, lo_cto]
                        masm::Instruction::Add,
                    ],
                    span,
                );
            }
            Type::I64 | Type::U64 => self.raw_exec("::miden::core::math::u64::cto", span),
            Type::I32 | Type::U32 | Type::I16 | Type::U16 | Type::I8 | Type::U8 => {
                // The number of trailing ones is de-facto clamped by the bitwidth of
                // the value, since all of the padding bits are leading zeros.
                self.emit(masm::Instruction::U32Cto, span)
            }
            Type::I1 => {
                // There is exactly one trailing one if true, or zero if false
                self.emit(masm::Instruction::EqImm(Felt::ONE.into()), span);
            }
            ty if !ty.is_integer() => {
                panic!("invalid cto on {ty}: only integral types are supported")
            }
            ty => unimplemented!("cto for {ty} is not supported"),
        }
        self.push(Type::U32);
    }

    /// Invert the bitwise representation of the integral value on top of the operand stack.
    ///
    /// This has the effect of changing all 1 bits to 0s, and all 0 bits to 1s.
    ///
    /// This operation consumes the input operand.
    pub fn bnot(&mut self, span: SourceSpan) {
        let arg = self.stack.pop().expect("operand stack is empty");
        let ty = arg.ty();
        match &ty {
            Type::I1 => self.emit(masm::Instruction::Not, span),
            Type::I8
            | Type::U8
            | Type::I16
            | Type::U16
            | Type::I32
            | Type::U32
            | Type::I64
            | Type::U64
            | Type::I128
            | Type::U128 => {
                let num_elements = ty.size_in_bits() / 32;
                match num_elements {
                    0 | 1 => {
                        self.emit(masm::Instruction::U32Not, span);
                    }
                    2 => {
                        self.emit_repeat(
                            2,
                            &[
                                Span::new(span, masm::Instruction::Swap1),
                                Span::new(span, masm::Instruction::U32Not),
                            ],
                        );
                    }
                    n => {
                        self.emit_template(n, |_| {
                            [
                                Span::new(span, movup_from_offset(n)),
                                Span::new(span, masm::Instruction::U32Not),
                            ]
                        });
                    }
                }
            }
            ty if !ty.is_integer() => {
                panic!("invalid bnot on {ty}, only integral types are supported")
            }
            ty => unimplemented!("bnot for {ty} is not supported"),
        }
        self.push(ty);
    }

    /// Invert the boolean value on top of the operand stack.
    ///
    /// This operation consumes the input operand.
    pub fn not(&mut self, span: SourceSpan) {
        let arg = self.stack.pop().expect("operand stack is empty");
        assert_eq!(arg.ty(), Type::I1, "logical NOT requires a boolean value");
        self.emit(masm::Instruction::Not, span);
        self.push(Type::I1);
    }

    /// Compute 2^N, where N is the integral value on top of the operand stack, as
    /// a value of the same type as the input.
    ///
    /// The input value must be < 64, or execution will trap.
    ///
    /// This operation consumes the input operand.
    pub fn pow2(&mut self, span: SourceSpan) {
        let arg = self.stack.pop().expect("operand stack is empty");
        let ty = arg.ty();
        match &ty {
            Type::U64 => {
                self.emit_all(
                    [
                        // Assert that the high bits are zero
                        masm::Instruction::Assertz,
                        // This asserts if value > 63, thus result is guaranteed to fit in u64
                        masm::Instruction::Pow2,
                        // Obtain the u64 representation by splitting the felt result
                        masm::Instruction::U32Split,
                    ],
                    span,
                );
            }
            Type::I64 => {
                self.raw_exec("::intrinsics::i64::pow2", span);
            }
            Type::Felt => {
                self.emit(masm::Instruction::Pow2, span);
            }
            Type::U32 => {
                self.emit_all([masm::Instruction::Pow2, masm::Instruction::U32Assert], span);
            }
            Type::I32 => {
                self.raw_exec("::intrinsics::i32::pow2", span);
            }
            Type::U8 | Type::U16 => {
                self.emit_all([masm::Instruction::Pow2, masm::Instruction::U32Assert], span);
                // Cast u32 to u8/u16
                self.int32_to_uint(ty.size_in_bits() as u32, span);
            }
            ty if !ty.is_unsigned_integer() => {
                panic!(
                    "invalid unary operand: pow2 only permits unsigned integer operands, got {ty}"
                )
            }
            ty => unimplemented!("pow2 for {ty} is not supported"),
        }
        self.push(ty);
    }

    /// Increment the operand on top of the stack by 1.
    ///
    /// The input value must be an integer, and overflow has wrapping semantics.
    ///
    /// This operation consumes the input operand.
    pub fn incr(&mut self, span: SourceSpan) {
        let arg = self.stack.pop().expect("operand stack is empty");
        let ty = arg.ty();
        match &ty {
            // For this specific case, wrapping u64 arithmetic works for both i64/u64
            Type::I64 | Type::U64 => {
                self.push_u64(1, span);
                self.add_u64(Overflow::Wrapping, span);
            }
            Type::Felt => {
                self.emit(masm::Instruction::Incr, span);
            }
            // For this specific case, wrapping u32 arithmetic works for both i32/u32
            Type::I32 | Type::U32 => {
                self.add_imm_u32(1, Overflow::Wrapping, span);
            }
            // We need to wrap the result for smallint types
            Type::I8 | Type::U8 | Type::I16 | Type::U16 => {
                let bits = ty.size_in_bits() as u32;
                self.add_imm_u32(1, Overflow::Wrapping, span);
                self.unchecked_mod_imm_u32(2u32.pow(bits), span);
            }
            ty if !ty.is_integer() => {
                panic!("invalid unary operand: incr requires an integer operand, got {ty}")
            }
            ty => unimplemented!("incr for {ty} is not supported"),
        }
        self.push(ty);
    }

    /// Compute the modular multiplicative inverse of the operand on top of the stack, `n`, i.e.
    /// `n^-1 mod P`.
    ///
    /// This operation consumes the input operand.
    pub fn inv(&mut self, span: SourceSpan) {
        let arg = self.pop().expect("operand stack is empty");
        let ty = arg.ty();
        match &ty {
            Type::Felt => {
                self.emit(masm::Instruction::Inv, span);
            }
            ty if !ty.is_integer() => {
                panic!("invalid unary operand: inv requires an integer, got {ty}")
            }
            ty => unimplemented!("inv for {ty} is not supported"),
        }
        self.push(ty);
    }

    /// Compute the modular negation of the operand on top of the stack, `n`, i.e. `-n mod P`.
    ///
    /// This operation consumes the input operand.
    pub fn neg(&mut self, span: SourceSpan) {
        let arg = self.pop().expect("operand stack is empty");
        let ty = arg.ty();
        match &ty {
            Type::Felt => {
                self.emit(masm::Instruction::Neg, span);
            }
            ty if !ty.is_integer() => {
                panic!("invalid unary operand: neg requires an integer, got {ty}")
            }
            ty => unimplemented!("neg for {ty} is not supported"),
        }
        self.push(ty);
    }
}