regalloc/data_structures.rs
1//! Data structures for the whole crate.
2
3use rustc_hash::FxHashMap;
4use rustc_hash::FxHashSet;
5use smallvec::SmallVec;
6
7use std::cmp::Ordering;
8use std::collections::VecDeque;
9use std::fmt;
10use std::hash::Hash;
11use std::marker::PhantomData;
12use std::ops::{Deref, DerefMut, Index, IndexMut};
13use std::slice::{Iter, IterMut};
14
15use crate::{Function, RegUsageMapper};
16
17#[cfg(feature = "enable-serde")]
18use serde::{Deserialize, Serialize};
19
20//=============================================================================
21// Queues
22
23pub type Queue<T> = VecDeque<T>;
24
25//=============================================================================
26// Maps
27
28// NOTE: plain HashMap is nondeterministic, even in a single-threaded
29// scenario, which can make debugging code that uses it really confusing. So
30// we use FxHashMap instead, as it *is* deterministic, and, allegedly, faster
31// too.
32pub type Map<K, V> = FxHashMap<K, V>;
33
34//=============================================================================
35// Sets of things
36
37// Same comment as above for FxHashMap.
38#[derive(Clone)]
39#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
40pub struct Set<T: Eq + Hash> {
41 set: FxHashSet<T>,
42}
43
44impl<T: Eq + Ord + Hash + Copy + fmt::Debug> Set<T> {
45 #[inline(never)]
46 pub fn empty() -> Self {
47 Self {
48 set: FxHashSet::<T>::default(),
49 }
50 }
51
52 #[inline(never)]
53 pub fn unit(item: T) -> Self {
54 let mut s = Self::empty();
55 s.insert(item);
56 s
57 }
58
59 #[inline(never)]
60 pub fn two(item1: T, item2: T) -> Self {
61 let mut s = Self::empty();
62 s.insert(item1);
63 s.insert(item2);
64 s
65 }
66
67 #[inline(never)]
68 pub fn card(&self) -> usize {
69 self.set.len()
70 }
71
72 #[inline(never)]
73 pub fn insert(&mut self, item: T) {
74 self.set.insert(item);
75 }
76
77 #[inline(never)]
78 pub fn delete(&mut self, item: T) {
79 self.set.remove(&item);
80 }
81
82 #[inline(never)]
83 pub fn is_empty(&self) -> bool {
84 self.set.is_empty()
85 }
86
87 #[inline(never)]
88 pub fn contains(&self, item: T) -> bool {
89 self.set.contains(&item)
90 }
91
92 #[inline(never)]
93 pub fn intersect(&mut self, other: &Self) {
94 let mut res = FxHashSet::<T>::default();
95 for item in self.set.iter() {
96 if other.set.contains(item) {
97 res.insert(*item);
98 }
99 }
100 self.set = res;
101 }
102
103 #[inline(never)]
104 pub fn union(&mut self, other: &Self) {
105 for item in other.set.iter() {
106 self.set.insert(*item);
107 }
108 }
109
110 #[inline(never)]
111 pub fn remove(&mut self, other: &Self) {
112 for item in other.set.iter() {
113 self.set.remove(item);
114 }
115 }
116
117 #[inline(never)]
118 pub fn intersects(&self, other: &Self) -> bool {
119 !self.set.is_disjoint(&other.set)
120 }
121
122 #[inline(never)]
123 pub fn is_subset_of(&self, other: &Self) -> bool {
124 self.set.is_subset(&other.set)
125 }
126
127 #[inline(never)]
128 pub fn to_vec(&self) -> Vec<T> {
129 let mut res = Vec::<T>::new();
130 for item in self.set.iter() {
131 res.push(*item)
132 }
133 // Don't delete this. It is important.
134 res.sort_unstable();
135 res
136 }
137
138 #[inline(never)]
139 pub fn from_vec(vec: Vec<T>) -> Self {
140 let mut res = Set::<T>::empty();
141 for x in vec {
142 res.insert(x);
143 }
144 res
145 }
146
147 #[inline(never)]
148 pub fn equals(&self, other: &Self) -> bool {
149 self.set == other.set
150 }
151
152 #[inline(never)]
153 pub fn retain<F>(&mut self, f: F)
154 where
155 F: FnMut(&T) -> bool,
156 {
157 self.set.retain(f)
158 }
159
160 #[inline(never)]
161 pub fn map<F, U>(&self, f: F) -> Set<U>
162 where
163 F: Fn(&T) -> U,
164 U: Eq + Ord + Hash + Copy + fmt::Debug,
165 {
166 Set {
167 set: self.set.iter().map(f).collect(),
168 }
169 }
170
171 #[inline(never)]
172 pub fn filter_map<F, U>(&self, f: F) -> Set<U>
173 where
174 F: Fn(&T) -> Option<U>,
175 U: Eq + Ord + Hash + Copy + fmt::Debug,
176 {
177 Set {
178 set: self.set.iter().filter_map(f).collect(),
179 }
180 }
181
182 pub fn clear(&mut self) {
183 self.set.clear();
184 }
185}
186
187impl<T: Eq + Ord + Hash + Copy + fmt::Debug> fmt::Debug for Set<T> {
188 #[inline(never)]
189 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
190 // Print the elements in some way which depends only on what is
191 // present in the set, and not on any other factor. In particular,
192 // <Debug for FxHashSet> has been observed to to print the elements
193 // of a two element set in both orders on different occasions.
194 let sorted_vec = self.to_vec();
195 let mut s = "{".to_string();
196 for i in 0..sorted_vec.len() {
197 if i > 0 {
198 s = s + &", ".to_string();
199 }
200 s = s + &format!("{:?}", &sorted_vec[i]);
201 }
202 s = s + &"}".to_string();
203 write!(fmt, "{}", s)
204 }
205}
206
207pub struct SetIter<'a, T> {
208 set_iter: std::collections::hash_set::Iter<'a, T>,
209}
210impl<T: Eq + Hash> Set<T> {
211 pub fn iter(&self) -> SetIter<T> {
212 SetIter {
213 set_iter: self.set.iter(),
214 }
215 }
216}
217impl<'a, T> Iterator for SetIter<'a, T> {
218 type Item = &'a T;
219 fn next(&mut self) -> Option<Self::Item> {
220 self.set_iter.next()
221 }
222}
223
224//=============================================================================
225// Iteration boilerplate for entities. The only purpose of this is to support
226// constructions of the form
227//
228// for ent in startEnt .dotdot( endPlus1Ent ) {
229// }
230//
231// until such time as `trait Step` is available in stable Rust. At that point
232// `fn dotdot` and all of the following can be removed, and the loops
233// rewritten using the standard syntax:
234//
235// for ent in startEnt .. endPlus1Ent {
236// }
237
238pub trait Zero {
239 fn zero() -> Self;
240}
241
242pub trait PlusN {
243 fn plus_n(&self, n: usize) -> Self;
244}
245
246#[derive(Clone, Copy)]
247#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
248pub struct Range<T> {
249 first: T,
250 len: usize,
251}
252
253impl<T: Copy + PartialOrd + PlusN> IntoIterator for Range<T> {
254 type Item = T;
255 type IntoIter = MyIterator<T>;
256 fn into_iter(self) -> Self::IntoIter {
257 MyIterator {
258 range: self,
259 next: self.first,
260 }
261 }
262}
263
264impl<T: Copy + Eq + Ord + PlusN> Range<T> {
265 /// Create a new range object.
266 pub fn new(from: T, len: usize) -> Range<T> {
267 Range { first: from, len }
268 }
269
270 pub fn start(&self) -> T {
271 self.first
272 }
273
274 pub fn first(&self) -> T {
275 assert!(self.len() > 0);
276 self.start()
277 }
278
279 pub fn last(&self) -> T {
280 assert!(self.len() > 0);
281 self.start().plus_n(self.len() - 1)
282 }
283
284 pub fn last_plus1(&self) -> T {
285 self.start().plus_n(self.len())
286 }
287
288 pub fn len(&self) -> usize {
289 self.len
290 }
291
292 pub fn contains(&self, t: T) -> bool {
293 t >= self.first && t < self.first.plus_n(self.len)
294 }
295}
296
297pub struct MyIterator<T> {
298 range: Range<T>,
299 next: T,
300}
301impl<T: Copy + PartialOrd + PlusN> Iterator for MyIterator<T> {
302 type Item = T;
303 fn next(&mut self) -> Option<Self::Item> {
304 if self.next >= self.range.first.plus_n(self.range.len) {
305 None
306 } else {
307 let res = Some(self.next);
308 self.next = self.next.plus_n(1);
309 res
310 }
311 }
312}
313
314//=============================================================================
315// Vectors where both the index and element types can be specified (and at
316// most 2^32-1 elems can be stored. What if this overflows?)
317
318pub struct TypedIxVec<TyIx, Ty> {
319 vek: Vec<Ty>,
320 ty_ix: PhantomData<TyIx>,
321}
322
323impl<TyIx, Ty> TypedIxVec<TyIx, Ty>
324where
325 Ty: Clone,
326 TyIx: Copy + Eq + Ord + Zero + PlusN + Into<u32>,
327{
328 pub fn new() -> Self {
329 Self {
330 vek: Vec::new(),
331 ty_ix: PhantomData::<TyIx>,
332 }
333 }
334 pub fn from_vec(vek: Vec<Ty>) -> Self {
335 Self {
336 vek,
337 ty_ix: PhantomData::<TyIx>,
338 }
339 }
340 pub fn append(&mut self, other: &mut TypedIxVec<TyIx, Ty>) {
341 // FIXME what if this overflows?
342 self.vek.append(&mut other.vek);
343 }
344 pub fn iter(&self) -> Iter<Ty> {
345 self.vek.iter()
346 }
347 pub fn iter_mut(&mut self) -> IterMut<Ty> {
348 self.vek.iter_mut()
349 }
350 pub fn len(&self) -> u32 {
351 // FIXME what if this overflows?
352 self.vek.len() as u32
353 }
354 pub fn push(&mut self, item: Ty) {
355 // FIXME what if this overflows?
356 self.vek.push(item);
357 }
358 pub fn resize(&mut self, new_len: u32, value: Ty) {
359 self.vek.resize(new_len as usize, value);
360 }
361 pub fn reserve(&mut self, additional: usize) {
362 self.vek.reserve(additional);
363 }
364 pub fn elems(&self) -> &[Ty] {
365 &self.vek[..]
366 }
367 pub fn elems_mut(&mut self) -> &mut [Ty] {
368 &mut self.vek[..]
369 }
370 pub fn range(&self) -> Range<TyIx> {
371 Range::new(TyIx::zero(), self.len() as usize)
372 }
373 pub fn remove(&mut self, idx: TyIx) -> Ty {
374 self.vek.remove(idx.into() as usize)
375 }
376 pub fn sort_by<F: FnMut(&Ty, &Ty) -> Ordering>(&mut self, compare: F) {
377 self.vek.sort_by(compare)
378 }
379 pub fn sort_unstable_by<F: FnMut(&Ty, &Ty) -> Ordering>(&mut self, compare: F) {
380 self.vek.sort_unstable_by(compare)
381 }
382 pub fn clear(&mut self) {
383 self.vek.clear();
384 }
385}
386
387impl<TyIx, Ty> Index<TyIx> for TypedIxVec<TyIx, Ty>
388where
389 TyIx: Into<u32>,
390{
391 type Output = Ty;
392 fn index(&self, ix: TyIx) -> &Ty {
393 &self.vek[ix.into() as usize]
394 }
395}
396
397impl<TyIx, Ty> IndexMut<TyIx> for TypedIxVec<TyIx, Ty>
398where
399 TyIx: Into<u32>,
400{
401 fn index_mut(&mut self, ix: TyIx) -> &mut Ty {
402 &mut self.vek[ix.into() as usize]
403 }
404}
405
406impl<TyIx, Ty> Clone for TypedIxVec<TyIx, Ty>
407where
408 Ty: Clone,
409{
410 // This is only needed for debug printing.
411 fn clone(&self) -> Self {
412 Self {
413 vek: self.vek.clone(),
414 ty_ix: PhantomData::<TyIx>,
415 }
416 }
417}
418
419//=============================================================================
420
421macro_rules! generate_boilerplate {
422 ($TypeIx:ident, $Type:ident, $PrintingPrefix:expr) => {
423 #[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
424 #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
425 // Firstly, the indexing type (TypeIx)
426 pub enum $TypeIx {
427 $TypeIx(u32),
428 }
429 impl $TypeIx {
430 #[allow(dead_code)]
431 #[inline(always)]
432 pub fn new(n: u32) -> Self {
433 debug_assert!(n != u32::max_value());
434 Self::$TypeIx(n)
435 }
436 #[allow(dead_code)]
437 #[inline(always)]
438 pub const fn max_value() -> Self {
439 Self::$TypeIx(u32::max_value() - 1)
440 }
441 #[allow(dead_code)]
442 #[inline(always)]
443 pub const fn min_value() -> Self {
444 Self::$TypeIx(u32::min_value())
445 }
446 #[allow(dead_code)]
447 #[inline(always)]
448 pub const fn invalid_value() -> Self {
449 Self::$TypeIx(u32::max_value())
450 }
451 #[allow(dead_code)]
452 #[inline(always)]
453 pub fn is_valid(self) -> bool {
454 self != Self::invalid_value()
455 }
456 #[allow(dead_code)]
457 #[inline(always)]
458 pub fn is_invalid(self) -> bool {
459 self == Self::invalid_value()
460 }
461 #[allow(dead_code)]
462 #[inline(always)]
463 pub fn get(self) -> u32 {
464 debug_assert!(self.is_valid());
465 match self {
466 $TypeIx::$TypeIx(n) => n,
467 }
468 }
469 #[allow(dead_code)]
470 #[inline(always)]
471 pub fn plus(self, delta: u32) -> $TypeIx {
472 debug_assert!(self.is_valid());
473 $TypeIx::$TypeIx(self.get() + delta)
474 }
475 #[allow(dead_code)]
476 #[inline(always)]
477 pub fn minus(self, delta: u32) -> $TypeIx {
478 debug_assert!(self.is_valid());
479 $TypeIx::$TypeIx(self.get() - delta)
480 }
481 #[allow(dead_code)]
482 pub fn dotdot(&self, last_plus1: $TypeIx) -> Range<$TypeIx> {
483 debug_assert!(self.is_valid());
484 let len = (last_plus1.get() - self.get()) as usize;
485 Range::new(*self, len)
486 }
487 }
488 impl fmt::Debug for $TypeIx {
489 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
490 if self.is_invalid() {
491 write!(fmt, "{}<NONE>", $PrintingPrefix)
492 } else {
493 write!(fmt, "{}{}", $PrintingPrefix, &self.get())
494 }
495 }
496 }
497 impl PlusN for $TypeIx {
498 #[inline(always)]
499 fn plus_n(&self, n: usize) -> Self {
500 debug_assert!(self.is_valid());
501 self.plus(n as u32)
502 }
503 }
504 impl Into<u32> for $TypeIx {
505 #[inline(always)]
506 fn into(self) -> u32 {
507 debug_assert!(self.is_valid());
508 self.get()
509 }
510 }
511 impl Zero for $TypeIx {
512 #[inline(always)]
513 fn zero() -> Self {
514 $TypeIx::new(0)
515 }
516 }
517 };
518}
519
520generate_boilerplate!(InstIx, Inst, "i");
521
522generate_boilerplate!(BlockIx, Block, "b");
523
524generate_boilerplate!(RangeFragIx, RangeFrag, "f");
525
526generate_boilerplate!(VirtualRangeIx, VirtualRange, "vr");
527
528generate_boilerplate!(RealRangeIx, RealRange, "rr");
529
530impl<TyIx, Ty: fmt::Debug> fmt::Debug for TypedIxVec<TyIx, Ty> {
531 // This is something of a hack in the sense that it doesn't show the
532 // indices, but oh well ..
533 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
534 write!(fmt, "{:?}", self.vek)
535 }
536}
537
538//=============================================================================
539// Definitions of register classes, registers and stack slots, and printing
540// thereof. Note that this register class definition is meant to be
541// architecture-independent: it simply captures common integer/float/vector
542// types that machines are likely to use. TODO: investigate whether we need a
543// more flexible register-class definition mechanism.
544
545#[derive(PartialEq, Eq, Debug, Clone, Copy)]
546#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
547pub enum RegClass {
548 I32 = 0,
549 F32 = 1,
550 I64 = 2,
551 F64 = 3,
552 V128 = 4,
553 INVALID = 5,
554}
555
556/// The number of register classes that exist.
557/// N.B.: must be <= 7 (fit into 3 bits) for 32-bit VReg/RReg packed format!
558pub const NUM_REG_CLASSES: usize = 5;
559
560impl RegClass {
561 /// Convert a register class to a u32 index.
562 #[inline(always)]
563 pub const fn rc_to_u32(self) -> u32 {
564 self as u32
565 }
566 /// Convert a register class to a usize index.
567 #[inline(always)]
568 pub const fn rc_to_usize(self) -> usize {
569 self as usize
570 }
571 /// Construct a register class from a u32.
572 #[inline(always)]
573 pub fn rc_from_u32(rc: u32) -> RegClass {
574 match rc {
575 0 => RegClass::I32,
576 1 => RegClass::F32,
577 2 => RegClass::I64,
578 3 => RegClass::F64,
579 4 => RegClass::V128,
580 _ => panic!("RegClass::rc_from_u32"),
581 }
582 }
583
584 pub fn short_name(self) -> &'static str {
585 match self {
586 RegClass::I32 => "I",
587 RegClass::I64 => "J",
588 RegClass::F32 => "F",
589 RegClass::F64 => "D",
590 RegClass::V128 => "V",
591 RegClass::INVALID => panic!("RegClass::short_name"),
592 }
593 }
594
595 pub fn long_name(self) -> &'static str {
596 match self {
597 RegClass::I32 => "I32",
598 RegClass::I64 => "I32",
599 RegClass::F32 => "F32",
600 RegClass::F64 => "F32",
601 RegClass::V128 => "V128",
602 RegClass::INVALID => panic!("RegClass::long_name"),
603 }
604 }
605}
606
607// Reg represents both real and virtual registers. For compactness and speed,
608// these fields are packed into a single u32. The format is:
609//
610// Virtual Reg: 1 rc:3 index:28
611// Real Reg: 0 rc:3 uu:12 enc:8 index:8
612//
613// `rc` is the register class. `uu` means "unused". `enc` is the hardware
614// encoding for the reg. `index` is a zero based index which has the
615// following meanings:
616//
617// * for a Virtual Reg, `index` is just the virtual register number.
618// * for a Real Reg, `index` is the entry number in the associated
619// `RealRegUniverse`.
620//
621// This scheme gives us:
622//
623// * a compact (32-bit) representation for registers
624// * fast equality tests for registers
625// * ability to handle up to 2^28 (268.4 million) virtual regs per function
626// * ability to handle up to 8 register classes
627// * ability to handle targets with up to 256 real registers
628// * ability to emit instructions containing real regs without having to
629// look up encodings in any side tables, since a real reg carries its
630// encoding
631// * efficient bitsets and arrays of virtual registers, since each has a
632// zero-based index baked in
633// * efficient bitsets and arrays of real registers, for the same reason
634//
635// This scheme makes it impossible to represent overlapping register classes,
636// but that doesn't seem important. AFAIK only ARM32 VFP/Neon has that.
637
638#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
639#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
640pub struct Reg {
641 bits: u32,
642}
643
644const INVALID_REG: u32 = 0xffffffff;
645
646impl Reg {
647 #[inline(always)]
648 pub fn is_virtual(self) -> bool {
649 self.is_valid() && (self.bits & 0x8000_0000) != 0
650 }
651 #[inline(always)]
652 pub fn is_real(self) -> bool {
653 self.is_valid() && (self.bits & 0x8000_0000) == 0
654 }
655 pub const fn new_real(rc: RegClass, enc: u8, index: u8) -> Self {
656 let n = (0 << 31) | (rc.rc_to_u32() << 28) | ((enc as u32) << 8) | ((index as u32) << 0);
657 Reg { bits: n }
658 }
659 pub fn new_virtual(rc: RegClass, index: u32) -> Self {
660 if index >= (1 << 28) {
661 panic!("new_virtual(): index too large");
662 }
663 let n = (1 << 31) | (rc.rc_to_u32() << 28) | (index << 0);
664 Reg { bits: n }
665 }
666 pub const fn invalid() -> Reg {
667 Reg { bits: INVALID_REG }
668 }
669 #[inline(always)]
670 pub fn is_invalid(self) -> bool {
671 self.bits == INVALID_REG
672 }
673 #[inline(always)]
674 pub fn is_valid(self) -> bool {
675 !self.is_invalid()
676 }
677 pub fn is_virtual_or_invalid(self) -> bool {
678 self.is_virtual() || self.is_invalid()
679 }
680 pub fn is_real_or_invalid(self) -> bool {
681 self.is_real() || self.is_invalid()
682 }
683 #[inline(always)]
684 pub fn get_class(self) -> RegClass {
685 debug_assert!(self.is_valid());
686 RegClass::rc_from_u32((self.bits >> 28) & 0x7)
687 }
688 #[inline(always)]
689 pub fn get_index(self) -> usize {
690 debug_assert!(self.is_valid());
691 // Return type is usize because typically we will want to use the
692 // result for indexing into a Vec
693 if self.is_virtual() {
694 (self.bits & ((1 << 28) - 1)) as usize
695 } else {
696 (self.bits & ((1 << 8) - 1)) as usize
697 }
698 }
699 #[inline(always)]
700 pub fn get_index_u32(self) -> u32 {
701 debug_assert!(self.is_valid());
702 if self.is_virtual() {
703 self.bits & ((1 << 28) - 1)
704 } else {
705 self.bits & ((1 << 8) - 1)
706 }
707 }
708 pub fn get_hw_encoding(self) -> u8 {
709 debug_assert!(self.is_valid());
710 if self.is_virtual() {
711 panic!("Virtual register does not have a hardware encoding")
712 } else {
713 ((self.bits >> 8) & ((1 << 8) - 1)) as u8
714 }
715 }
716 pub fn as_virtual_reg(self) -> Option<VirtualReg> {
717 // Allow invalid virtual regs as well.
718 if self.is_virtual_or_invalid() {
719 Some(VirtualReg { reg: self })
720 } else {
721 None
722 }
723 }
724 pub fn as_real_reg(self) -> Option<RealReg> {
725 // Allow invalid real regs as well.
726 if self.is_real_or_invalid() {
727 Some(RealReg { reg: self })
728 } else {
729 None
730 }
731 }
732 pub fn show_with_rru(self, univ: &RealRegUniverse) -> String {
733 if self.is_real() && self.get_index() < univ.regs.len() {
734 univ.regs[self.get_index()].1.clone()
735 } else if self.is_valid() {
736 format!("{:?}", self)
737 } else {
738 "rINVALID".to_string()
739 }
740 }
741}
742
743impl fmt::Debug for Reg {
744 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
745 if self.is_valid() {
746 write!(
747 fmt,
748 "{}{}{}",
749 if self.is_virtual() { "v" } else { "r" },
750 self.get_index(),
751 self.get_class().short_name(),
752 )
753 } else {
754 write!(fmt, "rINVALID")
755 }
756 }
757}
758
759// RealReg and VirtualReg are merely wrappers around Reg, which try to
760// dynamically ensure that they are really wrapping the correct flavour of
761// register.
762
763#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
764#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
765pub struct RealReg {
766 reg: Reg,
767}
768impl Reg /* !!not RealReg!! */ {
769 pub fn to_real_reg(self) -> RealReg {
770 if self.is_virtual() {
771 panic!("Reg::to_real_reg: this is a virtual register")
772 } else {
773 RealReg { reg: self }
774 }
775 }
776}
777impl RealReg {
778 pub fn get_class(self) -> RegClass {
779 self.reg.get_class()
780 }
781 #[inline(always)]
782 pub fn get_index(self) -> usize {
783 self.reg.get_index()
784 }
785 pub fn get_hw_encoding(self) -> usize {
786 self.reg.get_hw_encoding() as usize
787 }
788 #[inline(always)]
789 pub fn to_reg(self) -> Reg {
790 self.reg
791 }
792 pub fn invalid() -> RealReg {
793 RealReg {
794 reg: Reg::invalid(),
795 }
796 }
797 pub fn is_valid(self) -> bool {
798 self.reg.is_valid()
799 }
800 pub fn is_invalid(self) -> bool {
801 self.reg.is_invalid()
802 }
803 pub fn maybe_valid(self) -> Option<RealReg> {
804 if self == RealReg::invalid() {
805 None
806 } else {
807 Some(self)
808 }
809 }
810}
811impl fmt::Debug for RealReg {
812 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
813 write!(fmt, "{:?}", self.reg)
814 }
815}
816
817#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
818#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
819pub struct VirtualReg {
820 reg: Reg,
821}
822impl Reg /* !!not VirtualReg!! */ {
823 #[inline(always)]
824 pub fn to_virtual_reg(self) -> VirtualReg {
825 if self.is_virtual() {
826 VirtualReg { reg: self }
827 } else {
828 panic!("Reg::to_virtual_reg: this is a real register")
829 }
830 }
831}
832impl VirtualReg {
833 pub fn get_class(self) -> RegClass {
834 self.reg.get_class()
835 }
836 #[inline(always)]
837 pub fn get_index(self) -> usize {
838 self.reg.get_index()
839 }
840 #[inline(always)]
841 pub fn to_reg(self) -> Reg {
842 self.reg
843 }
844 pub fn invalid() -> VirtualReg {
845 VirtualReg {
846 reg: Reg::invalid(),
847 }
848 }
849 pub fn is_valid(self) -> bool {
850 self.reg.is_valid()
851 }
852 pub fn is_invalid(self) -> bool {
853 self.reg.is_invalid()
854 }
855 pub fn maybe_valid(self) -> Option<VirtualReg> {
856 if self == VirtualReg::invalid() {
857 None
858 } else {
859 Some(self)
860 }
861 }
862}
863impl fmt::Debug for VirtualReg {
864 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
865 write!(fmt, "{:?}", self.reg)
866 }
867}
868
869impl Reg {
870 /// Apply a vreg-rreg mapping to a Reg. This is used for registers used in
871 /// a read-role.
872 pub fn apply_uses<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
873 self.apply(|vreg| mapper.get_use(vreg));
874 }
875
876 /// Apply a vreg-rreg mapping to a Reg. This is used for registers used in
877 /// a write-role.
878 pub fn apply_defs<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
879 self.apply(|vreg| mapper.get_def(vreg));
880 }
881
882 /// Apply a vreg-rreg mapping to a Reg. This is used for registers used in
883 /// a modify-role.
884 pub fn apply_mods<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
885 self.apply(|vreg| mapper.get_mod(vreg));
886 }
887
888 fn apply<F: Fn(VirtualReg) -> Option<RealReg>>(&mut self, f: F) {
889 if let Some(vreg) = self.as_virtual_reg() {
890 if let Some(rreg) = f(vreg) {
891 debug_assert!(rreg.get_class() == vreg.get_class());
892 *self = rreg.to_reg();
893 } else {
894 panic!("Reg::apply: no mapping for {:?}", self);
895 }
896 }
897 }
898}
899
900/// A "writable register". This is a zero-cost wrapper that can be used to
901/// create a distinction, at the Rust type level, between a plain "register"
902/// and a "writable register".
903///
904/// Writable<..> can be used by the client to ensure that, internally, it only
905/// generates instructions that write to registers that should be written. The
906/// `InstRegUses` below, which must be implemented for every instruction,
907/// requires a `Writable<Reg>` (not just `Reg`) in its `defined` and
908/// `modified` sets. While we cannot hide the constructor for `Writable<..>`
909/// from certain parts of the client while exposing it to others, the client
910/// *can* adopt conventions to e.g. only ever call the Writable<..>
911/// constructor from its central vreg-management logic, and decide that any
912/// invocation of this constructor in a machine backend (for example) is an
913/// error.
914#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
915#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
916pub struct Writable<R> {
917 reg: R,
918}
919
920/// Set of requirements for types that can be wrapped in Writable.
921pub trait WritableBase:
922 Copy + Clone + PartialEq + Eq + Hash + PartialOrd + Ord + fmt::Debug
923{
924}
925
926impl<T> WritableBase for T where
927 T: Copy + Clone + PartialEq + Eq + Hash + PartialOrd + Ord + fmt::Debug
928{
929}
930
931impl<R> Writable<R> {
932 /// Create a Writable<R> from an R. The client should carefully audit where
933 /// it calls this constructor to ensure correctness (see `Writable<..>`
934 /// struct documentation).
935 #[inline(always)]
936 pub const fn from_reg(reg: R) -> Writable<R> {
937 Writable { reg }
938 }
939}
940
941impl<R: WritableBase> Writable<R> {
942 /// Get the inner Reg.
943 pub fn to_reg(&self) -> R {
944 self.reg
945 }
946
947 pub fn map<F, U>(&self, f: F) -> Writable<U>
948 where
949 F: Fn(R) -> U,
950 U: WritableBase,
951 {
952 Writable { reg: f(self.reg) }
953 }
954}
955
956#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
957pub struct SpillSlot(u32);
958
959impl SpillSlot {
960 #[inline(always)]
961 pub fn new(n: u32) -> Self {
962 Self(n)
963 }
964 #[inline(always)]
965 pub fn get(self) -> u32 {
966 self.0
967 }
968 #[inline(always)]
969 pub fn get_usize(self) -> usize {
970 self.get() as usize
971 }
972 pub fn round_up(self, num_slots: u32) -> SpillSlot {
973 assert!(num_slots > 0);
974 SpillSlot::new((self.get() + num_slots - 1) / num_slots * num_slots)
975 }
976 pub fn inc(self, num_slots: u32) -> SpillSlot {
977 SpillSlot::new(self.get() + num_slots)
978 }
979}
980
981impl fmt::Debug for SpillSlot {
982 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
983 write!(fmt, "S{}", self.get())
984 }
985}
986
987//=============================================================================
988// Register uses: low level interface
989
990// This minimal struct is visible from outside the regalloc.rs interface. It
991// is intended to be a safe wrapper around `RegVecs`, which isn't externally
992// visible. It is used to collect unsanitized reg use info from client
993// instructions.
994pub struct RegUsageCollector<'a> {
995 pub reg_vecs: &'a mut RegVecs,
996}
997
998impl<'a> RegUsageCollector<'a> {
999 pub fn new(reg_vecs: &'a mut RegVecs) -> Self {
1000 Self { reg_vecs }
1001 }
1002 pub fn add_use(&mut self, r: Reg) {
1003 self.reg_vecs.uses.push(r);
1004 }
1005 pub fn add_uses(&mut self, regs: &[Reg]) {
1006 self.reg_vecs.uses.extend(regs.iter());
1007 }
1008 pub fn add_def(&mut self, r: Writable<Reg>) {
1009 self.reg_vecs.defs.push(r.to_reg());
1010 }
1011 pub fn add_defs(&mut self, regs: &[Writable<Reg>]) {
1012 self.reg_vecs.defs.reserve(regs.len());
1013 for r in regs {
1014 self.reg_vecs.defs.push(r.to_reg());
1015 }
1016 }
1017 pub fn add_mod(&mut self, r: Writable<Reg>) {
1018 self.reg_vecs.mods.push(r.to_reg());
1019 }
1020 pub fn add_mods(&mut self, regs: &[Writable<Reg>]) {
1021 self.reg_vecs.mods.reserve(regs.len());
1022 for r in regs {
1023 self.reg_vecs.mods.push(r.to_reg());
1024 }
1025 }
1026
1027 // The presence of the following two is a hack, needed to support fuzzing
1028 // in the test framework. Real clients should not call them.
1029 pub fn get_use_def_mod_vecs_test_framework_only(&self) -> (Vec<Reg>, Vec<Reg>, Vec<Reg>) {
1030 (
1031 self.reg_vecs.uses.clone(),
1032 self.reg_vecs.defs.clone(),
1033 self.reg_vecs.mods.clone(),
1034 )
1035 }
1036
1037 pub fn get_empty_reg_vecs_test_framework_only(sanitized: bool) -> RegVecs {
1038 RegVecs::new(sanitized)
1039 }
1040}
1041
1042// Everything else is not visible outside the regalloc.rs interface.
1043
1044// There is one of these per function. Note that `defs` and `mods` lose the
1045// `Writable` constraint at this point. This is for convenience of having all
1046// three vectors be the same type, but comes at the cost of the loss of being
1047// able to differentiate readonly vs read/write registers in the Rust type
1048// system.
1049#[derive(Debug)]
1050pub struct RegVecs {
1051 pub uses: Vec<Reg>,
1052 pub defs: Vec<Reg>,
1053 pub mods: Vec<Reg>,
1054 sanitized: bool,
1055}
1056
1057impl RegVecs {
1058 pub fn new(sanitized: bool) -> Self {
1059 Self {
1060 uses: vec![],
1061 defs: vec![],
1062 mods: vec![],
1063 sanitized,
1064 }
1065 }
1066 pub fn is_sanitized(&self) -> bool {
1067 self.sanitized
1068 }
1069 pub fn set_sanitized(&mut self, sanitized: bool) {
1070 self.sanitized = sanitized;
1071 }
1072 pub fn clear(&mut self) {
1073 self.uses.clear();
1074 self.defs.clear();
1075 self.mods.clear();
1076 }
1077}
1078
1079// There is one of these per insn, so try and keep it as compact as possible.
1080// I think this should fit in 16 bytes.
1081#[derive(Clone, Debug)]
1082pub struct RegVecBounds {
1083 // These are the group start indices in RegVecs.{uses, defs, mods}.
1084 pub uses_start: u32,
1085 pub defs_start: u32,
1086 pub mods_start: u32,
1087 // And these are the group lengths. This does limit each instruction to
1088 // mentioning only 256 registers in any group, but that does not seem like a
1089 // problem.
1090 pub uses_len: u8,
1091 pub defs_len: u8,
1092 pub mods_len: u8,
1093}
1094
1095impl RegVecBounds {
1096 pub fn new() -> Self {
1097 Self {
1098 uses_start: 0,
1099 defs_start: 0,
1100 mods_start: 0,
1101 uses_len: 0,
1102 defs_len: 0,
1103 mods_len: 0,
1104 }
1105 }
1106}
1107
1108// This is the primary structure. We compute just one of these for an entire
1109// function.
1110pub struct RegVecsAndBounds {
1111 // The three vectors of registers. These can be arbitrarily long.
1112 pub vecs: RegVecs,
1113 // Admin info which tells us the location, for each insn, of its register
1114 // groups in `vecs`.
1115 pub bounds: TypedIxVec<InstIx, RegVecBounds>,
1116}
1117
1118impl RegVecsAndBounds {
1119 pub fn new(vecs: RegVecs, bounds: TypedIxVec<InstIx, RegVecBounds>) -> Self {
1120 Self { vecs, bounds }
1121 }
1122 pub fn is_sanitized(&self) -> bool {
1123 self.vecs.sanitized
1124 }
1125 #[allow(dead_code)] // XXX for some reason, Rustc 1.43.1 thinks this is currently unused.
1126 pub fn num_insns(&self) -> u32 {
1127 self.bounds.len()
1128 }
1129}
1130
1131//=============================================================================
1132// Register uses: convenience interface
1133
1134// Some call sites want to get reg use information as three Sets. This is a
1135// "convenience facility" which is easier to use but much slower than working
1136// with a whole-function `RegVecsAndBounds`. It shouldn't be used on critical
1137// paths.
1138#[derive(Debug)]
1139pub struct RegSets {
1140 pub uses: Set<Reg>, // registers that are read.
1141 pub defs: Set<Reg>, // registers that are written.
1142 pub mods: Set<Reg>, // registers that are modified.
1143 sanitized: bool,
1144}
1145
1146impl RegSets {
1147 pub fn new(sanitized: bool) -> Self {
1148 Self {
1149 uses: Set::<Reg>::empty(),
1150 defs: Set::<Reg>::empty(),
1151 mods: Set::<Reg>::empty(),
1152 sanitized,
1153 }
1154 }
1155
1156 pub fn is_sanitized(&self) -> bool {
1157 self.sanitized
1158 }
1159}
1160
1161impl RegVecsAndBounds {
1162 /* !!not RegSets!! */
1163 #[inline(never)]
1164 // Convenience function. Try to avoid using this.
1165 pub fn get_reg_sets_for_iix(&self, iix: InstIx) -> RegSets {
1166 let bounds = &self.bounds[iix];
1167 let mut regsets = RegSets::new(self.vecs.sanitized);
1168 for i in bounds.uses_start as usize..bounds.uses_start as usize + bounds.uses_len as usize {
1169 regsets.uses.insert(self.vecs.uses[i]);
1170 }
1171 for i in bounds.defs_start as usize..bounds.defs_start as usize + bounds.defs_len as usize {
1172 regsets.defs.insert(self.vecs.defs[i]);
1173 }
1174 for i in bounds.mods_start as usize..bounds.mods_start as usize + bounds.mods_len as usize {
1175 regsets.mods.insert(self.vecs.mods[i]);
1176 }
1177 regsets
1178 }
1179}
1180
1181//=============================================================================
1182// Definitions of the "real register universe".
1183
1184// A "Real Register Universe" is a read-only structure that contains all
1185// information about real registers on a given host. It serves several
1186// purposes:
1187//
1188// * defines the mapping from real register indices to the registers
1189// themselves
1190//
1191// * defines the size of the initial section of that mapping that is available
1192// to the register allocator for use, so that it can treat the registers
1193// under its control as a zero based, contiguous array. This is important
1194// for its efficiency.
1195//
1196// * gives meaning to Set<RealReg>, which otherwise would merely be a bunch of
1197// bits.
1198
1199#[derive(Clone, Debug)]
1200#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
1201pub struct RealRegUniverse {
1202 // The registers themselves. All must be real registers, and all must
1203 // have their index number (.get_index()) equal to the array index here,
1204 // since this is the only place where we map index numbers to actual
1205 // registers.
1206 pub regs: Vec<(RealReg, String)>,
1207
1208 // This is the size of the initial section of `regs` that is available to
1209 // the allocator. It must be <= `regs`.len().
1210 pub allocable: usize,
1211
1212 // Information about groups of allocable registers. Used to quickly address
1213 // only a group of allocable registers belonging to the same register class.
1214 // Indexes into `allocable_by_class` are RegClass values, such as
1215 // RegClass::F32. If the resulting entry is `None` then there are no
1216 // registers in that class. Otherwise the value is a `RegClassInfo`, which
1217 // provides a register range and possibly information about fixed uses.
1218 pub allocable_by_class: [Option<RegClassInfo>; NUM_REG_CLASSES],
1219}
1220
1221/// Information about a single register class in the `RealRegUniverse`.
1222#[derive(Clone, Copy, Debug)]
1223#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
1224pub struct RegClassInfo {
1225 // Range of allocatable registers in this register class, in terms of
1226 // register indices.
1227 //
1228 // A range (first, last) specifies the range of entries in
1229 // `RealRegUniverse.regs` corresponding to that class. The range includes
1230 // both `first` and `last`.
1231 //
1232 // In all cases, `last` must be < `RealRegUniverse.allocable`. In other
1233 // words, all ranges together in `allocable_by_class` must describe only the
1234 // allocable prefix of `regs`.
1235 //
1236 // For example, let's say
1237 // allocable_by_class[RegClass::F32] ==
1238 // Some(RegClassInfo { first: 10, last: 14, .. })
1239 // Then regs[10], regs[11], regs[12], regs[13], and regs[14] give all
1240 // registers of register class RegClass::F32.
1241 //
1242 // The effect of the above is that registers in `regs` must form
1243 // contiguous groups. This is checked by RealRegUniverse::check_is_sane().
1244 pub first: usize,
1245 pub last: usize,
1246
1247 // A register, if any, that is *guaranteed* not to be used as a fixed use
1248 // in any code, and so that the register allocator can statically reserve
1249 // for its own use as a temporary. Some register allocators may need such
1250 // a register for various maneuvers, for example a spillslot-to-spillslot
1251 // move when no (other) registers are free.
1252 pub suggested_scratch: Option<usize>,
1253}
1254
1255impl RealRegUniverse {
1256 /// Show it in a pretty way.
1257 pub fn show(&self) -> Vec<String> {
1258 let mut res = vec![];
1259 // Show the allocables
1260 for class_num in 0..NUM_REG_CLASSES {
1261 let class_info = match &self.allocable_by_class[class_num] {
1262 None => continue,
1263 Some(info) => info,
1264 };
1265 let class = RegClass::rc_from_u32(class_num as u32);
1266 let mut class_str = "class ".to_string()
1267 + &class.long_name().to_string()
1268 + &"(".to_string()
1269 + &class.short_name().to_string()
1270 + &") at ".to_string();
1271 class_str = class_str + &format!("[{} .. {}]: ", class_info.first, class_info.last);
1272 for ix in class_info.first..=class_info.last {
1273 class_str = class_str + &self.regs[ix].1;
1274 if let Some(suggested_ix) = class_info.suggested_scratch {
1275 if ix == suggested_ix {
1276 class_str = class_str + "*";
1277 }
1278 }
1279 class_str = class_str + " ";
1280 }
1281 res.push(class_str);
1282 }
1283 // And the non-allocables
1284 if self.allocable < self.regs.len() {
1285 let mut stragglers = format!(
1286 "not allocable at [{} .. {}]: ",
1287 self.allocable,
1288 self.regs.len() - 1
1289 );
1290 for ix in self.allocable..self.regs.len() {
1291 stragglers = stragglers + &self.regs[ix].1 + &" ".to_string();
1292 }
1293 res.push(stragglers);
1294 }
1295 res
1296 }
1297
1298 /// Check that the given universe satisfies various invariants, and panic
1299 /// if not. All the invariants are important.
1300 pub fn check_is_sane(&self) {
1301 let regs_len = self.regs.len();
1302 let regs_allocable = self.allocable;
1303 // The universe must contain at most 256 registers. That's because
1304 // `Reg` only has an 8-bit index value field, so if the universe
1305 // contained more than 256 registers, we'd never be able to index into
1306 // entries 256 and above. This is no limitation in practice since all
1307 // targets we're interested in contain (many) fewer than 256 regs in
1308 // total.
1309 let mut ok = regs_len <= 256;
1310 // The number of allocable registers must not exceed the number of
1311 // `regs` presented. In general it will be less, since the universe
1312 // will list some registers (stack pointer, etc) which are not
1313 // available for allocation.
1314 if ok {
1315 ok = regs_allocable <= regs_len;
1316 }
1317 // All registers must have an index value which points back at the
1318 // `regs` slot they are in. Also they really must be real regs.
1319 if ok {
1320 for i in 0..regs_len {
1321 let (reg, _name) = &self.regs[i];
1322 if ok && (reg.to_reg().is_virtual() || reg.get_index() != i) {
1323 ok = false;
1324 }
1325 }
1326 }
1327 // The allocatable regclass groupings defined by `allocable_first` and
1328 // `allocable_last` must be contiguous.
1329 if ok {
1330 let mut regclass_used = [false; NUM_REG_CLASSES];
1331 for rc in 0..NUM_REG_CLASSES {
1332 regclass_used[rc] = false;
1333 }
1334 for i in 0..regs_allocable {
1335 let (reg, _name) = &self.regs[i];
1336 let rc = reg.get_class().rc_to_u32() as usize;
1337 regclass_used[rc] = true;
1338 }
1339 // Scan forward through each grouping, checking that the listed
1340 // registers really are of the claimed class. Also count the
1341 // total number visited. This seems a fairly reliable way to
1342 // ensure that the groupings cover all allocated registers exactly
1343 // once, and that all classes are contiguous groups.
1344 let mut regs_visited = 0;
1345 for rc in 0..NUM_REG_CLASSES {
1346 match &self.allocable_by_class[rc] {
1347 &None => {
1348 if regclass_used[rc] {
1349 ok = false;
1350 }
1351 }
1352 &Some(RegClassInfo {
1353 first,
1354 last,
1355 suggested_scratch,
1356 }) => {
1357 if !regclass_used[rc] {
1358 ok = false;
1359 }
1360 if ok {
1361 for i in first..last + 1 {
1362 let (reg, _name) = &self.regs[i];
1363 if ok && RegClass::rc_from_u32(rc as u32) != reg.get_class() {
1364 ok = false;
1365 }
1366 regs_visited += 1;
1367 }
1368 }
1369 if ok {
1370 if let Some(s) = suggested_scratch {
1371 if s < first || s > last {
1372 ok = false;
1373 }
1374 }
1375 }
1376 }
1377 }
1378 }
1379 if ok && regs_visited != regs_allocable {
1380 ok = false;
1381 }
1382 }
1383 // So finally ..
1384 if !ok {
1385 panic!("RealRegUniverse::check_is_sane: invalid RealRegUniverse");
1386 }
1387 }
1388}
1389
1390//=============================================================================
1391// Representing and printing of live range fragments.
1392
1393#[derive(Copy, Clone, Hash, PartialEq, Eq, Ord)]
1394// There are four "points" within an instruction that are of interest, and
1395// these have a total ordering: R < U < D < S. They are:
1396//
1397// * R(eload): this is where any reload insns for the insn itself are
1398// considered to live.
1399//
1400// * U(se): this is where the insn is considered to use values from those of
1401// its register operands that appear in a Read or Modify role.
1402//
1403// * D(ef): this is where the insn is considered to define new values for
1404// those of its register operands that appear in a Write or Modify role.
1405//
1406// * S(pill): this is where any spill insns for the insn itself are considered
1407// to live.
1408//
1409// Instructions in the incoming Func may only exist at the U and D points,
1410// and so their associated live range fragments will only mention the U and D
1411// points. However, when adding spill code, we need a way to represent live
1412// ranges involving the added spill and reload insns, in which case R and S
1413// come into play:
1414//
1415// * A reload for instruction i is considered to be live from i.R to i.U.
1416//
1417// * A spill for instruction i is considered to be live from i.D to i.S.
1418
1419pub enum Point {
1420 // The values here are important. Don't change them.
1421 Reload = 0,
1422 Use = 1,
1423 Def = 2,
1424 Spill = 3,
1425}
1426
1427impl Point {
1428 #[inline(always)]
1429 pub fn is_reload(self) -> bool {
1430 match self {
1431 Point::Reload => true,
1432 _ => false,
1433 }
1434 }
1435 #[inline(always)]
1436 pub fn is_use(self) -> bool {
1437 match self {
1438 Point::Use => true,
1439 _ => false,
1440 }
1441 }
1442 #[inline(always)]
1443 pub fn is_def(self) -> bool {
1444 match self {
1445 Point::Def => true,
1446 _ => false,
1447 }
1448 }
1449 #[inline(always)]
1450 pub fn is_spill(self) -> bool {
1451 match self {
1452 Point::Spill => true,
1453 _ => false,
1454 }
1455 }
1456 #[inline(always)]
1457 pub fn is_use_or_def(self) -> bool {
1458 self.is_use() || self.is_def()
1459 }
1460}
1461
1462impl PartialOrd for Point {
1463 // In short .. R < U < D < S. This is probably what would be #derive'd
1464 // anyway, but we need to be sure.
1465 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1466 (*self as u32).partial_cmp(&(*other as u32))
1467 }
1468}
1469
1470// See comments below on `RangeFrag` for the meaning of `InstPoint`.
1471#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
1472pub struct InstPoint {
1473 /// This is conceptually:
1474 /// pub iix: InstIx,
1475 /// pub pt: Point,
1476 ///
1477 /// but packed into a single 32 bit word, so as
1478 /// (1) to ensure it is only 32 bits (and hence to guarantee that `RangeFrag`
1479 /// is 64 bits), and
1480 /// (2) to make it possible to implement `PartialOrd` using `PartialOrd`
1481 /// directly on 32 bit words (and hence we let it be derived).
1482 ///
1483 /// This has the format:
1484 /// InstIx as bits 31:2, Point as bits 1:0.
1485 ///
1486 /// It does give the slight limitation that all InstIxs must be < 2^30, but
1487 /// that's hardly a big deal: the analysis module rejects any input with 2^24
1488 /// or more Insns.
1489 ///
1490 /// Do not access this directly:
1491 bits: u32,
1492}
1493
1494impl InstPoint {
1495 #[inline(always)]
1496 pub fn new(iix: InstIx, pt: Point) -> Self {
1497 let iix_n = iix.get();
1498 assert!(iix_n < 0x4000_0000u32);
1499 let pt_n = pt as u32;
1500 InstPoint {
1501 bits: (iix_n << 2) | pt_n,
1502 }
1503 }
1504 #[inline(always)]
1505 pub fn iix(self) -> InstIx {
1506 InstIx::new(self.bits >> 2)
1507 }
1508 #[inline(always)]
1509 pub fn pt(self) -> Point {
1510 match self.bits & 3 {
1511 0 => Point::Reload,
1512 1 => Point::Use,
1513 2 => Point::Def,
1514 3 => Point::Spill,
1515 // This can never happen, but rustc doesn't seem to know that.
1516 _ => panic!("InstPt::pt: unreachable case"),
1517 }
1518 }
1519 #[inline(always)]
1520 pub fn set_iix(&mut self, iix: InstIx) {
1521 let iix_n = iix.get();
1522 assert!(iix_n < 0x4000_0000u32);
1523 self.bits = (iix_n << 2) | (self.bits & 3);
1524 }
1525 #[inline(always)]
1526 pub fn set_pt(&mut self, pt: Point) {
1527 self.bits = (self.bits & 0xFFFF_FFFCu32) | pt as u32;
1528 }
1529 #[inline(always)]
1530 pub fn new_reload(iix: InstIx) -> Self {
1531 InstPoint::new(iix, Point::Reload)
1532 }
1533 #[inline(always)]
1534 pub fn new_use(iix: InstIx) -> Self {
1535 InstPoint::new(iix, Point::Use)
1536 }
1537 #[inline(always)]
1538 pub fn new_def(iix: InstIx) -> Self {
1539 InstPoint::new(iix, Point::Def)
1540 }
1541 #[inline(always)]
1542 pub fn new_spill(iix: InstIx) -> Self {
1543 InstPoint::new(iix, Point::Spill)
1544 }
1545 #[inline(always)]
1546 pub fn invalid_value() -> Self {
1547 Self {
1548 bits: 0xFFFF_FFFFu32,
1549 }
1550 }
1551 #[inline(always)]
1552 pub fn max_value() -> Self {
1553 Self {
1554 bits: 0xFFFF_FFFFu32,
1555 }
1556 }
1557 #[inline(always)]
1558 pub fn min_value() -> Self {
1559 Self { bits: 0u32 }
1560 }
1561}
1562
1563impl fmt::Debug for InstPoint {
1564 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1565 write!(
1566 fmt,
1567 "{:?}{}",
1568 self.iix(),
1569 match self.pt() {
1570 Point::Reload => ".r",
1571 Point::Use => ".u",
1572 Point::Def => ".d",
1573 Point::Spill => ".s",
1574 }
1575 )
1576 }
1577}
1578
1579//=============================================================================
1580// Live Range Fragments, and their metrics
1581
1582// A Live Range Fragment (RangeFrag) describes a consecutive sequence of one or
1583// more instructions, in which a Reg is "live". The sequence must exist
1584// entirely inside only one basic block.
1585//
1586// However, merely indicating the start and end instruction numbers isn't
1587// enough: we must also include a "Use or Def" indication. These indicate two
1588// different "points" within each instruction: the Use position, where
1589// incoming registers are read, and the Def position, where outgoing registers
1590// are written. The Use position is considered to come before the Def
1591// position, as described for `Point` above.
1592//
1593// When we come to generate spill/restore live ranges, Point::S and Point::R
1594// also come into play. Live ranges (and hence, RangeFrags) that do not perform
1595// spills or restores should not use either of Point::S or Point::R.
1596//
1597// The set of positions denoted by
1598//
1599// {0 .. #insns-1} x {Reload point, Use point, Def point, Spill point}
1600//
1601// is exactly the set of positions that we need to keep track of when mapping
1602// live ranges to registers. This the reason for the type InstPoint. Note
1603// that InstPoint values have a total ordering, at least within a single basic
1604// block: the insn number is used as the primary key, and the Point part is
1605// the secondary key, with Reload < Use < Def < Spill.
1606#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
1607pub struct RangeFrag {
1608 pub first: InstPoint,
1609 pub last: InstPoint,
1610}
1611
1612impl fmt::Debug for RangeFrag {
1613 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1614 write!(fmt, "(RF: {:?}-{:?})", self.first, self.last)
1615 }
1616}
1617
1618impl RangeFrag {
1619 #[allow(dead_code)] // XXX for some reason, Rustc 1.43.1 thinks this is unused.
1620 pub fn new(first: InstPoint, last: InstPoint) -> Self {
1621 debug_assert!(first <= last);
1622 RangeFrag { first, last }
1623 }
1624
1625 pub fn invalid_value() -> Self {
1626 Self {
1627 first: InstPoint::invalid_value(),
1628 last: InstPoint::invalid_value(),
1629 }
1630 }
1631
1632 pub fn new_with_metrics<F: Function>(
1633 f: &F,
1634 bix: BlockIx,
1635 first: InstPoint,
1636 last: InstPoint,
1637 count: u16,
1638 ) -> (Self, RangeFragMetrics) {
1639 debug_assert!(f.block_insns(bix).len() >= 1);
1640 debug_assert!(f.block_insns(bix).contains(first.iix()));
1641 debug_assert!(f.block_insns(bix).contains(last.iix()));
1642 debug_assert!(first <= last);
1643 if first == last {
1644 debug_assert!(count == 1);
1645 }
1646 let first_iix_in_block = f.block_insns(bix).first();
1647 let last_iix_in_block = f.block_insns(bix).last();
1648 let first_pt_in_block = InstPoint::new_use(first_iix_in_block);
1649 let last_pt_in_block = InstPoint::new_def(last_iix_in_block);
1650 let kind = match (first == first_pt_in_block, last == last_pt_in_block) {
1651 (false, false) => RangeFragKind::Local,
1652 (false, true) => RangeFragKind::LiveOut,
1653 (true, false) => RangeFragKind::LiveIn,
1654 (true, true) => RangeFragKind::Thru,
1655 };
1656 (
1657 RangeFrag { first, last },
1658 RangeFragMetrics { bix, kind, count },
1659 )
1660 }
1661}
1662
1663// Comparison of RangeFrags. They form a partial order.
1664
1665pub fn cmp_range_frags(f1: &RangeFrag, f2: &RangeFrag) -> Option<Ordering> {
1666 if f1.last < f2.first {
1667 return Some(Ordering::Less);
1668 }
1669 if f1.first > f2.last {
1670 return Some(Ordering::Greater);
1671 }
1672 if f1.first == f2.first && f1.last == f2.last {
1673 return Some(Ordering::Equal);
1674 }
1675 None
1676}
1677
1678impl RangeFrag {
1679 pub fn contains(&self, ipt: &InstPoint) -> bool {
1680 self.first <= *ipt && *ipt <= self.last
1681 }
1682}
1683
1684// A handy summary hint for a RangeFrag. Note that none of these are correct
1685// if the RangeFrag has been extended so as to cover multiple basic blocks.
1686// But that ("RangeFrag compression") is something done locally within each
1687// algorithm (BT and LSRA). The analysis-phase output will not include any
1688// such compressed RangeFrags.
1689#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
1690pub enum RangeFragKind {
1691 Local, // Fragment exists entirely inside one block
1692 LiveIn, // Fragment is live in to a block, but ends inside it
1693 LiveOut, // Fragment is live out of a block, but starts inside it
1694 Thru, // Fragment is live through the block (starts and ends outside it)
1695}
1696
1697impl fmt::Debug for RangeFragKind {
1698 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1699 match self {
1700 RangeFragKind::Local => write!(fmt, "Local"),
1701 RangeFragKind::LiveIn => write!(fmt, "LiveIn"),
1702 RangeFragKind::LiveOut => write!(fmt, "LiveOut"),
1703 RangeFragKind::Thru => write!(fmt, "Thru"),
1704 }
1705 }
1706}
1707
1708// `RangeFrags` resulting from the initial analysis phase (analysis_data_flow.rs)
1709// exist only within single basic blocks, and therefore have some associated
1710// metrics, held by `RangeFragMetrics`:
1711//
1712// * a `count` field, which is a u16 indicating how often the associated storage
1713// unit (Reg) is mentioned inside the RangeFrag. It is assumed that the RangeFrag
1714// is associated with some Reg. If not, the `count` field is meaningless. This
1715// field has no effect on the correctness of the resulting allocation. It is used
1716// however in the estimation of `VirtualRange` spill costs, which are important
1717// for prioritising which `VirtualRange`s get assigned a register vs which have
1718// to be spilled.
1719//
1720// * `bix` field, which indicates which `Block` the fragment exists in. This
1721// field is actually redundant, since the containing `Block` can be inferred,
1722// laboriously, from the associated `RangeFrag`s `first` and `last` fields,
1723// providing you have an `InstIxToBlockIx` mapping table to hand. It is included
1724// here for convenience.
1725//
1726// * `kind` is another convenience field, indicating how the range is included
1727// within its owning block.
1728//
1729// The analysis phase (fn `deref_and_compress_sorted_range_frag_ixs`)
1730// compresses ranges and as a result breaks the invariant that a `RangeFrag`
1731// exists only within a single `Block`. For a `RangeFrag` spanning multiple
1732// `Block`s, all three `RangeFragMetric` fields are meaningless. This is the
1733// reason for separating `RangeFrag` and `RangeFragMetrics` -- so that it is
1734// possible to merge `RangeFrag`s without being forced to create fake values
1735// for the metrics fields.
1736#[derive(Clone, PartialEq)]
1737pub struct RangeFragMetrics {
1738 pub bix: BlockIx,
1739 pub kind: RangeFragKind,
1740 pub count: u16,
1741}
1742
1743impl fmt::Debug for RangeFragMetrics {
1744 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1745 write!(
1746 fmt,
1747 "(RFM: {:?}, count={}, {:?})",
1748 self.kind, self.count, self.bix
1749 )
1750 }
1751}
1752
1753//=============================================================================
1754// Vectors of RangeFragIxs, sorted so that the associated RangeFrags are in
1755// ascending order, per their InstPoint fields. The associated RangeFrags may
1756// not overlap.
1757//
1758// The "fragment environment" (usually called "frag_env"), to which the
1759// RangeFragIxs refer, is not stored here.
1760
1761#[derive(Clone)]
1762pub(crate) struct SortedRangeFragIxs(SmallVec<[RangeFragIx; 4]>);
1763
1764impl fmt::Debug for SortedRangeFragIxs {
1765 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1766 self.0.fmt(fmt)
1767 }
1768}
1769
1770impl Deref for SortedRangeFragIxs {
1771 type Target = SmallVec<[RangeFragIx; 4]>;
1772 #[inline(always)]
1773 fn deref(&self) -> &Self::Target {
1774 &self.0
1775 }
1776}
1777
1778impl SortedRangeFragIxs {
1779 fn check(&self, fenv: &TypedIxVec<RangeFragIx, RangeFrag>) {
1780 for i in 1..self.0.len() {
1781 let prev_frag = &fenv[self.0[i - 1]];
1782 let this_frag = &fenv[self.0[i]];
1783 if cmp_range_frags(prev_frag, this_frag) != Some(Ordering::Less) {
1784 panic!("SortedRangeFragIxs::check: vector not ok");
1785 }
1786 }
1787 }
1788
1789 fn sort(&mut self, fenv: &TypedIxVec<RangeFragIx, RangeFrag>) {
1790 self.0.sort_unstable_by(|fix_a, fix_b| {
1791 match cmp_range_frags(&fenv[*fix_a], &fenv[*fix_b]) {
1792 Some(Ordering::Less) => Ordering::Less,
1793 Some(Ordering::Greater) => Ordering::Greater,
1794 Some(Ordering::Equal) | None => {
1795 panic!("SortedRangeFragIxs::sort: overlapping Frags!")
1796 }
1797 }
1798 });
1799 }
1800
1801 pub(crate) fn new(
1802 frag_ixs: SmallVec<[RangeFragIx; 4]>,
1803 fenv: &TypedIxVec<RangeFragIx, RangeFrag>,
1804 ) -> Self {
1805 let mut res = Self(frag_ixs);
1806 // Check the source is ordered, and clone (or sort it).
1807 res.sort(fenv);
1808 res.check(fenv);
1809 res
1810 }
1811
1812 pub(crate) fn unit(fix: RangeFragIx, fenv: &TypedIxVec<RangeFragIx, RangeFrag>) -> Self {
1813 let mut res = Self(SmallVec::<[RangeFragIx; 4]>::new());
1814 res.0.push(fix);
1815 res.check(fenv);
1816 res
1817 }
1818
1819 /// Does this sorted list of range fragments contain the given instruction point?
1820 pub(crate) fn contains_pt(
1821 &self,
1822 fenv: &TypedIxVec<RangeFragIx, RangeFrag>,
1823 pt: InstPoint,
1824 ) -> bool {
1825 self.0
1826 .binary_search_by(|&ix| {
1827 let frag = &fenv[ix];
1828 if pt < frag.first {
1829 Ordering::Greater
1830 } else if pt >= frag.first && pt <= frag.last {
1831 Ordering::Equal
1832 } else {
1833 Ordering::Less
1834 }
1835 })
1836 .is_ok()
1837 }
1838}
1839
1840/// Vectors of RangeFrags, sorted so that they are in ascending order, per
1841/// their InstPoint fields. The RangeFrags may not overlap.
1842#[derive(Clone)]
1843pub(crate) struct SortedRangeFrags(SmallVec<[RangeFrag; 4]>);
1844
1845impl fmt::Debug for SortedRangeFrags {
1846 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1847 self.0.fmt(fmt)
1848 }
1849}
1850
1851impl Deref for SortedRangeFrags {
1852 type Target = SmallVec<[RangeFrag; 4]>;
1853 #[inline(always)]
1854 fn deref(&self) -> &Self::Target {
1855 &self.0
1856 }
1857}
1858
1859impl DerefMut for SortedRangeFrags {
1860 #[inline(always)]
1861 fn deref_mut(&mut self) -> &mut Self::Target {
1862 &mut self.0
1863 }
1864}
1865
1866impl SortedRangeFrags {
1867 pub(crate) fn unit(frag: RangeFrag) -> Self {
1868 let mut res = Self(SmallVec::<[RangeFrag; 4]>::new());
1869 res.0.push(frag);
1870 res
1871 }
1872
1873 pub(crate) fn empty() -> Self {
1874 Self(SmallVec::<[RangeFrag; 4]>::new())
1875 }
1876
1877 pub(crate) fn overlaps(&self, other: &Self) -> bool {
1878 // Since both vectors are sorted and individually non-overlapping, we can establish that
1879 // they are mutually non-overlapping by walking them simultaneously and checking, at each
1880 // step, that there is a unique "next lowest" frag available.
1881 let frags1 = &self.0;
1882 let frags2 = &other.0;
1883 let n1 = frags1.len();
1884 let n2 = frags2.len();
1885 let mut c1 = 0;
1886 let mut c2 = 0;
1887 loop {
1888 if c1 >= n1 || c2 >= n2 {
1889 // We made it to the end of one (or both) vectors without finding any conflicts: no
1890 // overlap.
1891 return false;
1892 }
1893 let f1 = &frags1[c1];
1894 let f2 = &frags2[c2];
1895 match cmp_range_frags(f1, f2) {
1896 Some(Ordering::Less) => c1 += 1,
1897 Some(Ordering::Greater) => c2 += 1,
1898 _ => {
1899 // There's no unique "next frag" -- either they are identical, or they overlap.
1900 // So we're done.
1901 return true;
1902 }
1903 }
1904 }
1905 }
1906
1907 /// Does this sorted list of range fragments contain the given instruction point?
1908 pub(crate) fn contains_pt(&self, pt: InstPoint) -> bool {
1909 self.0
1910 .binary_search_by(|frag| {
1911 if pt < frag.first {
1912 Ordering::Greater
1913 } else if pt >= frag.first && pt <= frag.last {
1914 Ordering::Equal
1915 } else {
1916 Ordering::Less
1917 }
1918 })
1919 .is_ok()
1920 }
1921}
1922
1923//=============================================================================
1924// Representing spill costs. A spill cost can either be infinite, in which
1925// case the associated VirtualRange may not be spilled, because it's already a
1926// spill/reload range. Or it can be finite, in which case it must be a 32-bit
1927// floating point number, which is (in the IEEE754 meaning of the terms)
1928// non-infinite, non-NaN and it must be non negative. In fact it's
1929// meaningless for a VLR to have a zero spill cost (how could that really be
1930// the case?) but we allow it here for convenience.
1931
1932#[derive(Copy, Clone)]
1933pub enum SpillCost {
1934 Infinite, // Infinite, positive
1935 Finite(f32), // Finite, non-negative
1936}
1937
1938impl fmt::Debug for SpillCost {
1939 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1940 match self {
1941 SpillCost::Infinite => write!(fmt, "INFINITY"),
1942 SpillCost::Finite(c) => write!(fmt, "{:<.3}", c),
1943 }
1944 }
1945}
1946
1947impl SpillCost {
1948 #[inline(always)]
1949 pub fn zero() -> Self {
1950 SpillCost::Finite(0.0)
1951 }
1952 #[inline(always)]
1953 pub fn infinite() -> Self {
1954 SpillCost::Infinite
1955 }
1956 #[inline(always)]
1957 pub fn finite(cost: f32) -> Self {
1958 // "`is_normal` returns true if the number is neither zero, infinite,
1959 // subnormal, or NaN."
1960 assert!(cost.is_normal() || cost == 0.0);
1961 // And also it can't be negative.
1962 assert!(cost >= 0.0);
1963 // Somewhat arbitrarily ..
1964 assert!(cost < 1e18);
1965 SpillCost::Finite(cost)
1966 }
1967 #[inline(always)]
1968 pub fn is_zero(&self) -> bool {
1969 match self {
1970 SpillCost::Infinite => false,
1971 SpillCost::Finite(c) => *c == 0.0,
1972 }
1973 }
1974 #[inline(always)]
1975 pub fn is_infinite(&self) -> bool {
1976 match self {
1977 SpillCost::Infinite => true,
1978 SpillCost::Finite(_) => false,
1979 }
1980 }
1981 #[inline(always)]
1982 pub fn is_finite(&self) -> bool {
1983 !self.is_infinite()
1984 }
1985 #[inline(always)]
1986 pub fn is_less_than(&self, other: &Self) -> bool {
1987 match (self, other) {
1988 // Dubious .. both are infinity
1989 (SpillCost::Infinite, SpillCost::Infinite) => false,
1990 // finite < inf
1991 (SpillCost::Finite(_), SpillCost::Infinite) => true,
1992 // inf is not < finite
1993 (SpillCost::Infinite, SpillCost::Finite(_)) => false,
1994 // straightforward
1995 (SpillCost::Finite(c1), SpillCost::Finite(c2)) => c1 < c2,
1996 }
1997 }
1998 #[inline(always)]
1999 pub fn add(&mut self, other: &Self) {
2000 match (*self, other) {
2001 (SpillCost::Finite(c1), SpillCost::Finite(c2)) => {
2002 // The 10^18 limit above gives us a lot of headroom here, since max
2003 // f32 is around 10^37.
2004 *self = SpillCost::Finite(c1 + c2);
2005 }
2006 (_, _) => {
2007 // All other cases produce an infinity.
2008 *self = SpillCost::Infinite;
2009 }
2010 }
2011 }
2012}
2013
2014//=============================================================================
2015// Representing and printing live ranges. These are represented by two
2016// different but closely related types, RealRange and VirtualRange.
2017
2018// RealRanges are live ranges for real regs (RealRegs). VirtualRanges are
2019// live ranges for virtual regs (VirtualRegs). VirtualRanges are the
2020// fundamental unit of allocation.
2021//
2022// A RealRange pairs a RealReg with a vector of RangeFragIxs in which it is
2023// live. The RangeFragIxs are indices into some vector of RangeFrags (a
2024// "fragment environment", 'fenv'), which is not specified here. They are
2025// sorted so as to give ascending order to the RangeFrags which they refer to.
2026//
2027// A VirtualRange pairs a VirtualReg with a vector of RangeFrags in which it
2028// is live. Same scheme as for a RealRange, except it avoids the overhead of
2029// having to indirect into the fragment environment.
2030//
2031// VirtualRanges also contain metrics:
2032//
2033// * `size` is the number of instructions in total spanned by the LR. It must
2034// not be zero.
2035//
2036// * `total cost` is an abstractified measure of the cost of the LR. Each
2037// basic block in which the range exists gives a contribution to the `total
2038// cost`, which is the number of times the register is mentioned in this
2039// block, multiplied by the estimated execution frequency for the block.
2040//
2041// * `spill_cost` is an abstractified measure of the cost of spilling the LR,
2042// and is the `total cost` divided by the `size`. The only constraint
2043// (w.r.t. correctness) is that normal LRs have a `Some` value, whilst
2044// `None` is reserved for live ranges created for spills and reloads and
2045// interpreted to mean "infinity". This is needed to guarantee that
2046// allocation can always succeed in the worst case, in which all of the
2047// original live ranges of the program are spilled.
2048//
2049// RealRanges don't carry any metrics info since we are not trying to allocate
2050// them. We merely need to work around them.
2051//
2052// I find it helpful to think of a live range, both RealRange and
2053// VirtualRange, as a "renaming equivalence class". That is, if you rename
2054// `reg` at some point inside `sorted_frags`, then you must rename *all*
2055// occurrences of `reg` inside `sorted_frags`, since otherwise the program will
2056// no longer work.
2057
2058#[derive(Clone)]
2059pub struct RealRange {
2060 pub rreg: RealReg,
2061 pub(crate) sorted_frags: SortedRangeFragIxs,
2062 pub is_ref: bool,
2063}
2064
2065impl fmt::Debug for RealRange {
2066 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
2067 write!(
2068 fmt,
2069 "(RR: {:?}{}, {:?})",
2070 self.rreg,
2071 if self.is_ref { " REF" } else { "" },
2072 self.sorted_frags
2073 )
2074 }
2075}
2076
2077impl RealRange {
2078 pub fn show_with_rru(&self, univ: &RealRegUniverse) -> String {
2079 format!(
2080 "(RR: {}{}, {:?})",
2081 self.rreg.to_reg().show_with_rru(univ),
2082 if self.is_ref { " REF" } else { "" },
2083 self.sorted_frags
2084 )
2085 }
2086}
2087
2088// VirtualRanges are live ranges for virtual regs (VirtualRegs). This does
2089// carry metrics info and also the identity of the RealReg to which it
2090// eventually got allocated. (Or in the backtracking allocator, the identity
2091// of the RealReg to which it is *currently* assigned; that may be undone at
2092// some later point.)
2093
2094#[derive(Clone)]
2095pub struct VirtualRange {
2096 pub(crate) vreg: VirtualReg,
2097 pub(crate) rreg: Option<RealReg>,
2098 pub(crate) sorted_frags: SortedRangeFrags,
2099 pub(crate) is_ref: bool,
2100 pub(crate) size: u16,
2101 pub(crate) total_cost: u32,
2102 pub(crate) spill_cost: SpillCost, // == total_cost / size
2103}
2104
2105impl VirtualRange {
2106 pub fn overlaps(&self, other: &Self) -> bool {
2107 self.sorted_frags.overlaps(&other.sorted_frags)
2108 }
2109}
2110
2111impl fmt::Debug for VirtualRange {
2112 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
2113 write!(
2114 fmt,
2115 "(VR: {:?}{},",
2116 self.vreg,
2117 if self.is_ref { " REF" } else { "" }
2118 )?;
2119 if self.rreg.is_some() {
2120 write!(fmt, " -> {:?}", self.rreg.unwrap())?;
2121 }
2122 write!(
2123 fmt,
2124 " sz={}, tc={}, sc={:?}, {:?})",
2125 self.size, self.total_cost, self.spill_cost, self.sorted_frags
2126 )
2127 }
2128}
2129
2130//=============================================================================
2131// Some auxiliary/miscellaneous data structures that are useful: RegToRangesMaps
2132
2133/// Mappings from RealRegs and VirtualRegs to the sets of RealRanges and VirtualRanges that belong
2134/// to them. These are needed for BT's coalescing analysis and for the dataflow analysis that
2135/// supports reftype handling.
2136pub(crate) struct RegToRangesMaps {
2137 /// This maps RealReg indices to the set of RealRangeIxs for that RealReg. Valid indices are
2138 /// real register indices for all non-sanitised real regs; that is,
2139 /// 0..RealRegUniverse::allocable. The Vecs of RealRangeIxs are duplicate-free.
2140 ///
2141 /// The SmallVec capacity of 6 was chosen after quite some profiling, of Cranelift/x64/newBE
2142 /// compiling ZenGarden.wasm -- a huge input, with many relatively small functions. Profiling
2143 /// was performed in August 2020, using Valgrind/DHAT.
2144 pub(crate) rreg_to_rlrs_map: Vec</*real reg ix, */ SmallVec<[RealRangeIx; 6]>>,
2145
2146 /// This maps VirtualReg indices to the set of VirtualRangeIxs for that VirtualReg. Valid
2147 /// indices are 0..Function::get_num_vregs(). For functions mostly translated from SSA,
2148 /// most VirtualRegs will have just one VirtualRange, and there are a lot of VirtualRegs in
2149 /// general. So SmallVec is a definite benefit here.
2150 pub(crate) vreg_to_vlrs_map: Vec</*virtual reg ix, */ SmallVec<[VirtualRangeIx; 3]>>,
2151
2152 /// As an optimisation heuristic for BT's coalescing analysis, these indicate which
2153 /// real/virtual registers have "many" `RangeFrag`s in their live ranges. For some definition
2154 /// of "many", as defined by the `many_frags_thresh` field. This is not important for overall
2155 /// allocation result or correctness: it merely allows the coalescing analysis to switch
2156 /// between two search strategies, one of which is fast for regs with few `RangeFrag`s (the
2157 /// vast majority) and the other of which has better asymptotic behaviour for regs with many
2158 /// `RangeFrag`s (in order to keep out of trouble on some pathological inputs). These vectors
2159 /// are duplicate-free but the elements may be in an arbitrary order.
2160 pub(crate) rregs_with_many_frags: Vec<u32 /*RealReg index*/>,
2161 /// Same as above, for virtual registers.
2162 pub(crate) vregs_with_many_frags: Vec<u32 /*VirtualReg index*/>,
2163
2164 /// This indicates what the threshold is actually set to. A frag will be in
2165 /// `{r,v}regs_with_many_frags` if it has `many_frags_thresh` or more RangeFrags.
2166 pub(crate) many_frags_thresh: usize,
2167}
2168
2169/// `MoveInfoElem` holds info about the two registers connected a move: the source and destination
2170/// of the move, the instruction performing the move, and the estimated execution frequency of the
2171/// containing block.
2172pub(crate) struct MoveInfoElem {
2173 pub(crate) dst: Reg,
2174 pub(crate) src: Reg,
2175 pub(crate) iix: InstIx,
2176 pub(crate) est_freq: u32,
2177}
2178
2179/// Vector of `MoveInfoElem`, presented in no particular order, but duplicate-free in that each
2180/// move instruction will be listed only once.
2181pub(crate) struct MoveInfo(Vec<MoveInfoElem>);
2182
2183impl MoveInfo {
2184 pub(crate) fn new(move_info: Vec<MoveInfoElem>) -> Self {
2185 Self(move_info)
2186 }
2187}
2188
2189impl Deref for MoveInfo {
2190 type Target = Vec<MoveInfoElem>;
2191 #[inline(always)]
2192 fn deref(&self) -> &Self::Target {
2193 &self.0
2194 }
2195}
2196
2197// Something that can be either a VirtualRangeIx or a RealRangeIx, whilst still being 32 bits
2198// (by stealing one bit from those spaces). Note that the resulting thing no longer denotes a
2199// contiguous index space, and so it has a name that indicates it is an identifier rather than
2200// an index.
2201
2202#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)]
2203pub struct RangeId {
2204 // 1 X--(31)--X is a RealRangeIx with value X--(31)--X
2205 // 0 X--(31)--X is a VirtualRangeIx with value X--(31)--X
2206 bits: u32,
2207}
2208
2209impl RangeId {
2210 #[inline(always)]
2211 pub fn new_real(rlrix: RealRangeIx) -> Self {
2212 let n = rlrix.get();
2213 assert!(n <= 0x7FFF_FFFF);
2214 Self {
2215 bits: n | 0x8000_0000,
2216 }
2217 }
2218 #[inline(always)]
2219 pub fn new_virtual(vlrix: VirtualRangeIx) -> Self {
2220 let n = vlrix.get();
2221 assert!(n <= 0x7FFF_FFFF);
2222 Self { bits: n }
2223 }
2224 #[inline(always)]
2225 pub fn is_real(self) -> bool {
2226 self.bits & 0x8000_0000 != 0
2227 }
2228 #[allow(dead_code)]
2229 #[inline(always)]
2230 pub fn is_virtual(self) -> bool {
2231 self.bits & 0x8000_0000 == 0
2232 }
2233 #[inline(always)]
2234 pub fn to_real(self) -> RealRangeIx {
2235 assert!(self.bits & 0x8000_0000 != 0);
2236 RealRangeIx::new(self.bits & 0x7FFF_FFFF)
2237 }
2238 #[inline(always)]
2239 pub fn to_virtual(self) -> VirtualRangeIx {
2240 assert!(self.bits & 0x8000_0000 == 0);
2241 VirtualRangeIx::new(self.bits)
2242 }
2243 #[inline(always)]
2244 pub fn invalid_value() -> Self {
2245 // Real, and inplausibly huge
2246 Self { bits: 0xFFFF_FFFF }
2247 }
2248}
2249
2250impl fmt::Debug for RangeId {
2251 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
2252 if self.is_real() {
2253 self.to_real().fmt(fmt)
2254 } else {
2255 self.to_virtual().fmt(fmt)
2256 }
2257 }
2258}
2259
2260//=============================================================================
2261// Test cases
2262
2263// sewardj 2020Mar04: these are commented out for now, as they no longer
2264// compile. They may be useful later though, once BT acquires an interval
2265// tree implementation for its CommitmentMap.
2266
2267/*
2268#[test]
2269fn test_sorted_frag_ranges() {
2270 // Create a RangeFrag and RangeFragIx from two InstPoints.
2271 fn gen_fix(
2272 fenv: &mut TypedIxVec<RangeFragIx, RangeFrag>, first: InstPoint,
2273 last: InstPoint,
2274 ) -> RangeFragIx {
2275 assert!(first <= last);
2276 let res = RangeFragIx::new(fenv.len() as u32);
2277 let frag = RangeFrag {
2278 bix: BlockIx::new(123),
2279 kind: RangeFragKind::Local,
2280 first,
2281 last,
2282 count: 0,
2283 };
2284 fenv.push(frag);
2285 res
2286 }
2287
2288 fn get_range_frag(
2289 fenv: &TypedIxVec<RangeFragIx, RangeFrag>, fix: RangeFragIx,
2290 ) -> &RangeFrag {
2291 &fenv[fix]
2292 }
2293
2294 // Structural equality, at least. Not equality in the sense of
2295 // deferencing the contained RangeFragIxes.
2296 fn sorted_range_eq(
2297 fixs1: &SortedRangeFragIxs, fixs2: &SortedRangeFragIxs,
2298 ) -> bool {
2299 if fixs1.frag_ixs.len() != fixs2.frag_ixs.len() {
2300 return false;
2301 }
2302 for (mf1, mf2) in fixs1.frag_ixs.iter().zip(&fixs2.frag_ixs) {
2303 if mf1 != mf2 {
2304 return false;
2305 }
2306 }
2307 true
2308 }
2309
2310 let iix3 = InstIx::new(3);
2311 let iix4 = InstIx::new(4);
2312 let iix5 = InstIx::new(5);
2313 let iix6 = InstIx::new(6);
2314 let iix7 = InstIx::new(7);
2315 let iix10 = InstIx::new(10);
2316 let iix12 = InstIx::new(12);
2317
2318 let fp_3u = InstPoint::new_use(iix3);
2319 let fp_3d = InstPoint::new_def(iix3);
2320
2321 let fp_4u = InstPoint::new_use(iix4);
2322
2323 let fp_5u = InstPoint::new_use(iix5);
2324 let fp_5d = InstPoint::new_def(iix5);
2325
2326 let fp_6u = InstPoint::new_use(iix6);
2327 let fp_6d = InstPoint::new_def(iix6);
2328
2329 let fp_7u = InstPoint::new_use(iix7);
2330 let fp_7d = InstPoint::new_def(iix7);
2331
2332 let fp_10u = InstPoint::new_use(iix10);
2333 let fp_12u = InstPoint::new_use(iix12);
2334
2335 let mut fenv = TypedIxVec::<RangeFragIx, RangeFrag>::new();
2336
2337 let fix_3u = gen_fix(&mut fenv, fp_3u, fp_3u);
2338 let fix_3d = gen_fix(&mut fenv, fp_3d, fp_3d);
2339 let fix_4u = gen_fix(&mut fenv, fp_4u, fp_4u);
2340 let fix_3u_5u = gen_fix(&mut fenv, fp_3u, fp_5u);
2341 let fix_3d_5d = gen_fix(&mut fenv, fp_3d, fp_5d);
2342 let fix_3d_5u = gen_fix(&mut fenv, fp_3d, fp_5u);
2343 let fix_3u_5d = gen_fix(&mut fenv, fp_3u, fp_5d);
2344 let fix_6u_6d = gen_fix(&mut fenv, fp_6u, fp_6d);
2345 let fix_7u_7d = gen_fix(&mut fenv, fp_7u, fp_7d);
2346 let fix_10u = gen_fix(&mut fenv, fp_10u, fp_10u);
2347 let fix_12u = gen_fix(&mut fenv, fp_12u, fp_12u);
2348
2349 // Boundary checks for point ranges, 3u vs 3d
2350 assert!(
2351 cmp_range_frags(
2352 get_range_frag(&fenv, fix_3u),
2353 get_range_frag(&fenv, fix_3u)
2354 ) == Some(Ordering::Equal)
2355 );
2356 assert!(
2357 cmp_range_frags(
2358 get_range_frag(&fenv, fix_3u),
2359 get_range_frag(&fenv, fix_3d)
2360 ) == Some(Ordering::Less)
2361 );
2362 assert!(
2363 cmp_range_frags(
2364 get_range_frag(&fenv, fix_3d),
2365 get_range_frag(&fenv, fix_3u)
2366 ) == Some(Ordering::Greater)
2367 );
2368
2369 // Boundary checks for point ranges, 3d vs 4u
2370 assert!(
2371 cmp_range_frags(
2372 get_range_frag(&fenv, fix_3d),
2373 get_range_frag(&fenv, fix_3d)
2374 ) == Some(Ordering::Equal)
2375 );
2376 assert!(
2377 cmp_range_frags(
2378 get_range_frag(&fenv, fix_3d),
2379 get_range_frag(&fenv, fix_4u)
2380 ) == Some(Ordering::Less)
2381 );
2382 assert!(
2383 cmp_range_frags(
2384 get_range_frag(&fenv, fix_4u),
2385 get_range_frag(&fenv, fix_3d)
2386 ) == Some(Ordering::Greater)
2387 );
2388
2389 // Partially overlapping
2390 assert!(
2391 cmp_range_frags(
2392 get_range_frag(&fenv, fix_3d_5d),
2393 get_range_frag(&fenv, fix_3u_5u)
2394 ) == None
2395 );
2396 assert!(
2397 cmp_range_frags(
2398 get_range_frag(&fenv, fix_3u_5u),
2399 get_range_frag(&fenv, fix_3d_5d)
2400 ) == None
2401 );
2402
2403 // Completely overlapping: one contained within the other
2404 assert!(
2405 cmp_range_frags(
2406 get_range_frag(&fenv, fix_3d_5u),
2407 get_range_frag(&fenv, fix_3u_5d)
2408 ) == None
2409 );
2410 assert!(
2411 cmp_range_frags(
2412 get_range_frag(&fenv, fix_3u_5d),
2413 get_range_frag(&fenv, fix_3d_5u)
2414 ) == None
2415 );
2416
2417 // Create a SortedRangeFragIxs from a bunch of RangeFrag indices
2418 fn new_sorted_frag_ranges(
2419 fenv: &TypedIxVec<RangeFragIx, RangeFrag>, frags: &Vec<RangeFragIx>,
2420 ) -> SortedRangeFragIxs {
2421 SortedRangeFragIxs::new(&frags, fenv)
2422 }
2423
2424 // Construction tests
2425 // These fail due to overlap
2426 //let _ = new_sorted_frag_ranges(&fenv, &vec![fix_3u_3u, fix_3u_3u]);
2427 //let _ = new_sorted_frag_ranges(&fenv, &vec![fix_3u_5u, fix_3d_5d]);
2428
2429 // These fail due to not being in order
2430 //let _ = new_sorted_frag_ranges(&fenv, &vec![fix_4u_4u, fix_3u_3u]);
2431
2432 // Simple non-overlap tests for add()
2433
2434 let smf_empty = new_sorted_frag_ranges(&fenv, &vec![]);
2435 let smf_6_7_10 =
2436 new_sorted_frag_ranges(&fenv, &vec![fix_6u_6d, fix_7u_7d, fix_10u]);
2437 let smf_3_12 = new_sorted_frag_ranges(&fenv, &vec![fix_3u, fix_12u]);
2438 let smf_3_6_7_10_12 = new_sorted_frag_ranges(
2439 &fenv,
2440 &vec![fix_3u, fix_6u_6d, fix_7u_7d, fix_10u, fix_12u],
2441 );
2442 let mut tmp;
2443
2444 tmp = smf_empty.clone();
2445 tmp.add(&smf_empty, &fenv);
2446 assert!(sorted_range_eq(&tmp, &smf_empty));
2447
2448 tmp = smf_3_12.clone();
2449 tmp.add(&smf_empty, &fenv);
2450 assert!(sorted_range_eq(&tmp, &smf_3_12));
2451
2452 tmp = smf_empty.clone();
2453 tmp.add(&smf_3_12, &fenv);
2454 assert!(sorted_range_eq(&tmp, &smf_3_12));
2455
2456 tmp = smf_6_7_10.clone();
2457 tmp.add(&smf_3_12, &fenv);
2458 assert!(sorted_range_eq(&tmp, &smf_3_6_7_10_12));
2459
2460 tmp = smf_3_12.clone();
2461 tmp.add(&smf_6_7_10, &fenv);
2462 assert!(sorted_range_eq(&tmp, &smf_3_6_7_10_12));
2463
2464 // Tests for can_add()
2465 assert!(true == smf_empty.can_add(&smf_empty, &fenv));
2466 assert!(true == smf_empty.can_add(&smf_3_12, &fenv));
2467 assert!(true == smf_3_12.can_add(&smf_empty, &fenv));
2468 assert!(false == smf_3_12.can_add(&smf_3_12, &fenv));
2469
2470 assert!(true == smf_6_7_10.can_add(&smf_3_12, &fenv));
2471
2472 assert!(true == smf_3_12.can_add(&smf_6_7_10, &fenv));
2473
2474 // Tests for del()
2475 let smf_6_7 = new_sorted_frag_ranges(&fenv, &vec![fix_6u_6d, fix_7u_7d]);
2476 let smf_6_10 = new_sorted_frag_ranges(&fenv, &vec![fix_6u_6d, fix_10u]);
2477 let smf_7 = new_sorted_frag_ranges(&fenv, &vec![fix_7u_7d]);
2478 let smf_10 = new_sorted_frag_ranges(&fenv, &vec![fix_10u]);
2479
2480 tmp = smf_empty.clone();
2481 tmp.del(&smf_empty, &fenv);
2482 assert!(sorted_range_eq(&tmp, &smf_empty));
2483
2484 tmp = smf_3_12.clone();
2485 tmp.del(&smf_empty, &fenv);
2486 assert!(sorted_range_eq(&tmp, &smf_3_12));
2487
2488 tmp = smf_empty.clone();
2489 tmp.del(&smf_3_12, &fenv);
2490 assert!(sorted_range_eq(&tmp, &smf_empty));
2491
2492 tmp = smf_6_7_10.clone();
2493 tmp.del(&smf_3_12, &fenv);
2494 assert!(sorted_range_eq(&tmp, &smf_6_7_10));
2495
2496 tmp = smf_3_12.clone();
2497 tmp.del(&smf_6_7_10, &fenv);
2498 assert!(sorted_range_eq(&tmp, &smf_3_12));
2499
2500 tmp = smf_6_7_10.clone();
2501 tmp.del(&smf_6_7, &fenv);
2502 assert!(sorted_range_eq(&tmp, &smf_10));
2503
2504 tmp = smf_6_7_10.clone();
2505 tmp.del(&smf_10, &fenv);
2506 assert!(sorted_range_eq(&tmp, &smf_6_7));
2507
2508 tmp = smf_6_7_10.clone();
2509 tmp.del(&smf_7, &fenv);
2510 assert!(sorted_range_eq(&tmp, &smf_6_10));
2511
2512 // Tests for can_add_if_we_first_del()
2513 let smf_10_12 = new_sorted_frag_ranges(&fenv, &vec![fix_10u, fix_12u]);
2514
2515 assert!(
2516 true
2517 == smf_6_7_10
2518 .can_add_if_we_first_del(/*d=*/ &smf_10_12, /*a=*/ &smf_3_12, &fenv)
2519 );
2520
2521 assert!(
2522 false
2523 == smf_6_7_10
2524 .can_add_if_we_first_del(/*d=*/ &smf_10_12, /*a=*/ &smf_7, &fenv)
2525 );
2526}
2527*/