cranelift-codegen 0.95.1

Low-level code generator library
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
;; Algebraic optimizations.

;; Rules here are allowed to rewrite pure expressions arbitrarily,
;; using the same inputs as the original, or fewer. In other words, we
;; cannot pull a new eclass id out of thin air and refer to it, other
;; than a piece of the input or a new node that we construct; but we
;; can freely rewrite e.g. `x+y-y` to `x`.

;; Chained `uextend` and `sextend`.
(rule (simplify (uextend ty (uextend _intermediate_ty x)))
      (uextend ty x))
(rule (simplify (sextend ty (sextend _intermediate_ty x)))
      (sextend ty x))

;; x+0 == 0+x == x.
(rule (simplify (iadd ty
                      x
                      (iconst ty (u64_from_imm64 0))))
      (subsume x))
(rule (simplify (iadd ty
                      (iconst ty (u64_from_imm64 0))
                      x))
      (subsume x))
;; x-0 == x.
(rule (simplify (isub ty
                      x
                      (iconst ty (u64_from_imm64 0))))
      (subsume x))
;; 0-x == (ineg x).
(rule (simplify (isub ty
                      (iconst ty (u64_from_imm64 0))
                      x))
      (ineg ty x))

;; ineg(ineg(x)) == x.
(rule (simplify (ineg ty (ineg ty x))) (subsume x))

;; ineg(x) * ineg(y) == x*y.
(rule (simplify (imul ty (ineg ty x) (ineg ty y)))
      (subsume (imul ty x y)))

;; iabs(ineg(x)) == iabs(x).
(rule (simplify (iabs ty (ineg ty x)))
      (iabs ty x))

;; iabs(iabs(x)) == iabs(x).
(rule (simplify (iabs ty inner @ (iabs ty x)))
      (subsume inner))

;; x-x == 0.
(rule (simplify (isub (fits_in_64 (ty_int ty)) x x)) (subsume (iconst ty (imm64 0))))

;; x*1 == 1*x == x.
(rule (simplify (imul ty
                      x
                      (iconst ty (u64_from_imm64 1))))
      (subsume x))
(rule (simplify (imul ty
                      (iconst ty (u64_from_imm64 1))
                      x))
      (subsume x))

;; x*0 == 0*x == 0.
(rule (simplify (imul ty
                      _
                      zero @ (iconst ty (u64_from_imm64 0))))
      (subsume zero))
(rule (simplify (imul ty
                      zero @ (iconst ty (u64_from_imm64 0))
                      _))
      (subsume zero))

;; x*-1 == -1*x == ineg(x).
(rule (simplify (imul ty x (iconst ty c)))
      (if-let -1 (i64_sextend_imm64 ty c))
      (ineg ty x))
(rule (simplify (imul ty (iconst ty c) x))
      (if-let -1 (i64_sextend_imm64 ty c))
      (ineg ty x))

;; x/1 == x.
(rule (simplify (sdiv ty
                      x
                      (iconst ty (u64_from_imm64 1))))
      (subsume x))
(rule (simplify (udiv ty
                      x
                      (iconst ty (u64_from_imm64 1))))
      (subsume x))

;; x>>0 == x<<0 == x rotr 0 == x rotl 0 == x.
(rule (simplify (ishl ty
                      x
                      (iconst ty (u64_from_imm64 0))))
      (subsume x))
(rule (simplify (ushr ty
                      x
                      (iconst ty (u64_from_imm64 0))))
      (subsume x))
(rule (simplify (sshr ty
                      x
                      (iconst ty (u64_from_imm64 0))))
      (subsume x))
(rule (simplify (rotr ty
                      x
                      (iconst ty (u64_from_imm64 0))))
      (subsume x))
(rule (simplify (rotl ty
                      x
                      (iconst ty (u64_from_imm64 0))))
      (subsume x))

;; x | 0 == 0 | x == x | x == x.
(rule (simplify (bor ty
                     x
                     (iconst ty (u64_from_imm64 0))))
      (subsume x))
(rule (simplify (bor ty
                     (iconst ty (u64_from_imm64 0))
                     x))
      (subsume x))
(rule (simplify (bor ty x x))
      (subsume x))

;; x ^ 0 == 0 ^ x == x.
(rule (simplify (bxor ty
                     x
                     (iconst ty (u64_from_imm64 0))))
      (subsume x))
(rule (simplify (bxor ty
                     (iconst ty (u64_from_imm64 0))
                     x))
      (subsume x))

;; x ^ x == 0.
(rule (simplify (bxor (fits_in_64 (ty_int ty)) x x))
      (subsume (iconst ty (imm64 0))))

;; x ^ not(x) == not(x) ^ x == x | not(x) == not(x) | x == -1.
;; This identity also holds for non-integer types, vectors, and wider types.
;; But `iconst` is only valid for integers up to 64 bits wide.
(rule (simplify (bxor (fits_in_64 (ty_int ty)) x (bnot ty x))) (subsume (iconst ty (imm64 (ty_mask ty)))))
(rule (simplify (bxor (fits_in_64 (ty_int ty)) (bnot ty x) x)) (subsume (iconst ty (imm64 (ty_mask ty)))))
(rule (simplify (bor (fits_in_64 (ty_int ty)) x (bnot ty x))) (subsume (iconst ty (imm64 (ty_mask ty)))))
(rule (simplify (bor (fits_in_64 (ty_int ty)) (bnot ty x) x)) (subsume (iconst ty (imm64 (ty_mask ty)))))

;; x & -1 == -1 & x == x & x == x.
(rule (simplify (band ty x x)) (subsume x))
(rule (simplify (band ty x (iconst ty k)))
      (if-let -1 (i64_sextend_imm64 ty k))
      (subsume x))
(rule (simplify (band ty (iconst ty k) x))
      (if-let -1 (i64_sextend_imm64 ty k))
      (subsume x))

;; x & 0 == 0 & x == x & not(x) == not(x) & x == 0.
(rule (simplify (band ty _ zero @ (iconst ty (u64_from_imm64 0)))) (subsume zero))
(rule (simplify (band ty zero @ (iconst ty (u64_from_imm64 0)) _)) (subsume zero))
(rule (simplify (band (fits_in_64 (ty_int ty)) x (bnot ty x))) (subsume (iconst ty (imm64 0))))
(rule (simplify (band (fits_in_64 (ty_int ty)) (bnot ty x) x)) (subsume (iconst ty (imm64 0))))

;; not(not(x)) == x.
(rule (simplify (bnot ty (bnot ty x))) (subsume x))

;; DeMorgan's rule (two versions):
;; bnot(bor(x, y)) == band(bnot(x), bnot(y))
(rule (simplify (bnot ty (bor ty x y)))
      (band ty (bnot ty x) (bnot ty y)))
;; bnot(band(x, y)) == bor(bnot(x), bnot(y))
(rule (simplify (bnot ty (band t x y)))
      (bor ty (bnot ty x) (bnot ty y)))

;; `or(and(x, y), not(y)) == or(x, not(y))`
(rule (simplify (bor ty
                     (band ty x y)
                     z @ (bnot ty y)))
      (bor ty x z))
;; Duplicate the rule but swap the `bor` operands because `bor` is
;; commutative. We could, of course, add a `simplify` rule to do the commutative
;; swap for all `bor`s but this will bloat the e-graph with many e-nodes. It is
;; cheaper to have additional rules, rather than additional e-nodes, because we
;; amortize their cost via ISLE's smart codegen.
(rule (simplify (bor ty
                     z @ (bnot ty y)
                     (band ty x y)))
      (bor ty x z))

;; `or(and(x, y), not(y)) == or(x, not(y))` specialized for constants, since
;; otherwise we may not know that `z == not(y)` since we don't generally expand
;; constants in the e-graph.
;;
;; (No need to duplicate for commutative `bor` for this constant version because
;; we move constants to the right.)
(rule (simplify (bor ty
                     (band ty x (iconst ty (u64_from_imm64 y)))
                     z @ (iconst ty (u64_from_imm64 zk))))
      (if-let $true (u64_eq (u64_and (ty_mask ty) zk)
                            (u64_and (ty_mask ty) (u64_not y))))
      (bor ty x z))

;; x*2 == 2*x == x+x.
(rule (simplify (imul ty x (iconst _ (simm32 2))))
      (iadd ty x x))
(rule (simplify (imul ty (iconst _ (simm32 2)) x))
      (iadd ty x x))

;; x*c == x<<log2(c) when c is a power of two.
;; Note that the type of `iconst` must be the same as the type of `imul`,
;; so these rules can only fire in situations where it's safe to construct an
;; `iconst` of that type.
(rule (simplify (imul ty x (iconst _ (imm64_power_of_two c))))
      (ishl ty x (iconst ty (imm64 c))))
(rule (simplify (imul ty (iconst _ (imm64_power_of_two c)) x))
      (ishl ty x (iconst ty (imm64 c))))

;; TODO: strength reduction: div to shifts
;; TODO: div/rem by constants -> magic multiplications


;; `(x >> k) << k` is the same as masking off the bottom `k` bits (regardless if
;; this is a signed or unsigned shift right).
(rule (simplify (ishl (fits_in_64 ty)
                      (ushr ty x (iconst _ k))
                      (iconst _ k)))
      (let ((mask Imm64 (imm64_shl ty (imm64 0xFFFF_FFFF_FFFF_FFFF) k)))
        (band ty x (iconst ty mask))))
(rule (simplify (ishl (fits_in_64 ty)
                      (sshr ty x (iconst _ k))
                      (iconst _ k)))
      (let ((mask Imm64 (imm64_shl ty (imm64 0xFFFF_FFFF_FFFF_FFFF) k)))
        (band ty x (iconst ty mask))))


;; For unsigned shifts, `(x << k) >> k` is the same as masking out the top
;; `k` bits. A similar rule is valid for vectors but this `iconst` mask only
;; works for scalar integers.
(rule (simplify (ushr (fits_in_64 (ty_int ty))
                      (ishl ty x (iconst _ k))
                      (iconst _ k)))
      (band ty x (iconst ty (imm64_ushr ty (imm64 (ty_mask ty)) k))))


;; For signed shifts, `(x << k) >> k` does sign-extension from `n` bits to
;; `n+k` bits. In the special case where `x` is the result of either `sextend`
;; or `uextend` from `n` bits to `n+k` bits, we can implement this using
;; `sextend`.
(rule (simplify (sshr wide
                 (ishl wide
                  (uextend wide x @ (value_type narrow))
                  (iconst _ shift))
                 (iconst _ shift)))
      (if-let (u64_from_imm64 shift_u64) shift)
      (if-let $true (u64_eq shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow))))
      (sextend wide x))

;; If `k` is smaller than the difference in bit widths of the two types, then
;; the intermediate sign bit comes from the extend op, so the final result is
;; the same as the original extend op.
(rule (simplify (sshr wide
                 (ishl wide
                  x @ (uextend wide (value_type narrow))
                  (iconst _ shift))
                 (iconst _ shift)))
      (if-let (u64_from_imm64 shift_u64) shift)
      (if-let $true (u64_lt shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow))))
      x)

;; If the original extend op was `sextend`, then both of the above cases say
;; the result should also be `sextend`.
(rule (simplify (sshr wide
                 (ishl wide
                  x @ (sextend wide (value_type narrow))
                  (iconst _ shift))
                 (iconst _ shift)))
      (if-let (u64_from_imm64 shift_u64) shift)
      (if-let $true (u64_le shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow))))
      x)


;; Masking out any of the top bits of the result of `uextend` is a no-op. (This
;; is like a cheap version of known-bits analysis.)
(rule (simplify (band wide x @ (uextend _ (value_type narrow)) (iconst _ (u64_from_imm64 mask))))
      ; Check that `narrow_mask` has a subset of the bits that `mask` does.
      (if-let $true (let ((narrow_mask u64 (ty_mask narrow))) (u64_eq narrow_mask (u64_and mask narrow_mask))))
      x)

;; Masking out the sign-extended bits of an `sextend` turns it into a `uextend`.
(rule (simplify (band wide (sextend _ x @ (value_type narrow)) (iconst _ (u64_from_imm64 mask))))
      (if-let $true (u64_eq mask (ty_mask narrow)))
      (uextend wide x))


;; Rematerialize ALU-op-with-imm and iconsts in each block where they're
;; used. This is neutral (add-with-imm) or positive (iconst) for
;; register pressure, and these ops are very cheap.
(rule (simplify x @ (iadd _ (iconst _ _) _))
      (remat x))
(rule (simplify x @ (iadd _ _ (iconst _ _)))
      (remat x))
(rule (simplify x @ (isub _ (iconst _ _) _))
      (remat x))
(rule (simplify x @ (isub _ _ (iconst _ _)))
      (remat x))
(rule (simplify x @ (band _ (iconst _ _) _))
      (remat x))
(rule (simplify x @ (band _ _ (iconst _ _)))
      (remat x))
(rule (simplify x @ (bor _ (iconst _ _) _))
      (remat x))
(rule (simplify x @ (bor _ _ (iconst _ _)))
      (remat x))
(rule (simplify x @ (bxor _ (iconst _ _) _))
      (remat x))
(rule (simplify x @ (bxor _ _ (iconst _ _)))
      (remat x))
(rule (simplify x @ (bnot _ _))
      (remat x))
(rule (simplify x @ (iconst _ _))
      (remat x))
(rule (simplify x @ (f32const _ _))
      (remat x))
(rule (simplify x @ (f64const _ _))
      (remat x))

;; (x ^ -1) can be replaced with the `bnot` instruction
(rule (simplify (bxor ty x (iconst ty k)))
  (if-let -1 (i64_sextend_imm64 ty k))
  (bnot ty x))

;; 32-bit integers zero-extended to 64-bit integers are never negative
(rule (simplify
       (slt ty
             (uextend $I64 x @ (value_type $I32))
             (iconst _ (u64_from_imm64 0))))
      (iconst ty (imm64 0)))
(rule (simplify
       (sge ty
             (uextend $I64 x @ (value_type $I32))
             (iconst _ (u64_from_imm64 0))))
      (iconst ty (imm64 1)))

;; Transform select-of-icmp into {u,s}{min,max} instructions where possible.
(rule (simplify (select ty (sgt _ x y) x y)) (smax ty x y))
(rule (simplify (select ty (sge _ x y) x y)) (smax ty x y))
(rule (simplify (select ty (ugt _ x y) x y)) (umax ty x y))
(rule (simplify (select ty (uge _ x y) x y)) (umax ty x y))
(rule (simplify (select ty (slt _ x y) x y)) (smin ty x y))
(rule (simplify (select ty (sle _ x y) x y)) (smin ty x y))
(rule (simplify (select ty (ult _ x y) x y)) (umin ty x y))
(rule (simplify (select ty (ule _ x y) x y)) (umin ty x y))

;; These are the same rules as above, but when the operands for select are swapped
(rule (simplify (select ty (slt _ x y) y x)) (smax ty x y))
(rule (simplify (select ty (sle _ x y) y x)) (smax ty x y))
(rule (simplify (select ty (ult _ x y) y x)) (umax ty x y))
(rule (simplify (select ty (ule _ x y) y x)) (umax ty x y))
(rule (simplify (select ty (sgt _ x y) y x)) (smin ty x y))
(rule (simplify (select ty (sge _ x y) y x)) (smin ty x y))
(rule (simplify (select ty (ugt _ x y) y x)) (umin ty x y))
(rule (simplify (select ty (uge _ x y) y x)) (umin ty x y))

;; Transform bitselect-of-icmp into {u,s}{min,max} instructions where possible.
(rule (simplify (bitselect ty @ (multi_lane _ _) (sgt _ x y) x y)) (smax ty x y))
(rule (simplify (bitselect ty @ (multi_lane _ _) (sge _ x y) x y)) (smax ty x y))
(rule (simplify (bitselect ty @ (multi_lane _ _) (ugt _ x y) x y)) (umax ty x y))
(rule (simplify (bitselect ty @ (multi_lane _ _) (uge _ x y) x y)) (umax ty x y))
(rule (simplify (bitselect ty @ (multi_lane _ _) (slt _ x y) x y)) (smin ty x y))
(rule (simplify (bitselect ty @ (multi_lane _ _) (sle _ x y) x y)) (smin ty x y))
(rule (simplify (bitselect ty @ (multi_lane _ _) (ult _ x y) x y)) (umin ty x y))
(rule (simplify (bitselect ty @ (multi_lane _ _) (ule _ x y) x y)) (umin ty x y))

;; These are the same rules as above, but when the operands for select are swapped
(rule (simplify (bitselect ty @ (multi_lane _ _) (slt _ x y) y x)) (smax ty x y))
(rule (simplify (bitselect ty @ (multi_lane _ _) (sle _ x y) y x)) (smax ty x y))
(rule (simplify (bitselect ty @ (multi_lane _ _) (ult _ x y) y x)) (umax ty x y))
(rule (simplify (bitselect ty @ (multi_lane _ _) (ule _ x y) y x)) (umax ty x y))
(rule (simplify (bitselect ty @ (multi_lane _ _) (sgt _ x y) y x)) (smin ty x y))
(rule (simplify (bitselect ty @ (multi_lane _ _) (sge _ x y) y x)) (smin ty x y))
(rule (simplify (bitselect ty @ (multi_lane _ _) (ugt _ x y) y x)) (umin ty x y))
(rule (simplify (bitselect ty @ (multi_lane _ _) (uge _ x y) y x)) (umin ty x y))

;; For floats convert fcmp lt into pseudo_min and gt into pseudo_max
;;
;; fmax_pseudo docs state:
;; The behaviour for this operations is defined as  fmax_pseudo(a, b) = (a < b) ? b : a, and the behaviour for zero
;; or NaN inputs follows from the behaviour of < with such inputs.
;;
;; That is exactly the operation that we match here!
(rule (simplify
       (select ty (fcmp _ (FloatCC.LessThan) x y) x y))
      (fmin_pseudo ty x y))
(rule (simplify
       (select ty (fcmp _ (FloatCC.GreaterThan) x y) x y))
      (fmax_pseudo ty x y))

;; TODO: perform this same optimization to `f{min,max}_pseudo` for vectors
;; with the `bitselect` instruction, but the pattern is a bit more complicated
;; due to most bitselects-over-floats having bitcasts.

;; fneg(fneg(x)) == x.
(rule (simplify (fneg ty (fneg ty x))) (subsume x))

;; If both of the multiplied arguments to an `fma` are negated then remove
;; both of them since they cancel out.
(rule (simplify (fma ty (fneg ty x) (fneg ty y) z))
      (fma ty x y z))

;; If both of the multiplied arguments to an `fmul` are negated then remove
;; both of them since they cancel out.
(rule (simplify (fmul ty (fneg ty x) (fneg ty y)))
      (fmul ty x y))