regalloc/
data_structures.rs

1//! Data structures for the whole crate.
2
3use rustc_hash::FxHashMap;
4use rustc_hash::FxHashSet;
5use smallvec::SmallVec;
6
7use std::cmp::Ordering;
8use std::collections::VecDeque;
9use std::fmt;
10use std::hash::Hash;
11use std::marker::PhantomData;
12use std::ops::{Deref, DerefMut, Index, IndexMut};
13use std::slice::{Iter, IterMut};
14
15use crate::{Function, RegUsageMapper};
16
17#[cfg(feature = "enable-serde")]
18use serde::{Deserialize, Serialize};
19
20//=============================================================================
21// Queues
22
23pub type Queue<T> = VecDeque<T>;
24
25//=============================================================================
26// Maps
27
28// NOTE: plain HashMap is nondeterministic, even in a single-threaded
29// scenario, which can make debugging code that uses it really confusing.  So
30// we use FxHashMap instead, as it *is* deterministic, and, allegedly, faster
31// too.
32pub type Map<K, V> = FxHashMap<K, V>;
33
34//=============================================================================
35// Sets of things
36
37// Same comment as above for FxHashMap.
38#[derive(Clone)]
39#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
40pub struct Set<T: Eq + Hash> {
41    set: FxHashSet<T>,
42}
43
44impl<T: Eq + Ord + Hash + Copy + fmt::Debug> Set<T> {
45    #[inline(never)]
46    pub fn empty() -> Self {
47        Self {
48            set: FxHashSet::<T>::default(),
49        }
50    }
51
52    #[inline(never)]
53    pub fn unit(item: T) -> Self {
54        let mut s = Self::empty();
55        s.insert(item);
56        s
57    }
58
59    #[inline(never)]
60    pub fn two(item1: T, item2: T) -> Self {
61        let mut s = Self::empty();
62        s.insert(item1);
63        s.insert(item2);
64        s
65    }
66
67    #[inline(never)]
68    pub fn card(&self) -> usize {
69        self.set.len()
70    }
71
72    #[inline(never)]
73    pub fn insert(&mut self, item: T) {
74        self.set.insert(item);
75    }
76
77    #[inline(never)]
78    pub fn delete(&mut self, item: T) {
79        self.set.remove(&item);
80    }
81
82    #[inline(never)]
83    pub fn is_empty(&self) -> bool {
84        self.set.is_empty()
85    }
86
87    #[inline(never)]
88    pub fn contains(&self, item: T) -> bool {
89        self.set.contains(&item)
90    }
91
92    #[inline(never)]
93    pub fn intersect(&mut self, other: &Self) {
94        let mut res = FxHashSet::<T>::default();
95        for item in self.set.iter() {
96            if other.set.contains(item) {
97                res.insert(*item);
98            }
99        }
100        self.set = res;
101    }
102
103    #[inline(never)]
104    pub fn union(&mut self, other: &Self) {
105        for item in other.set.iter() {
106            self.set.insert(*item);
107        }
108    }
109
110    #[inline(never)]
111    pub fn remove(&mut self, other: &Self) {
112        for item in other.set.iter() {
113            self.set.remove(item);
114        }
115    }
116
117    #[inline(never)]
118    pub fn intersects(&self, other: &Self) -> bool {
119        !self.set.is_disjoint(&other.set)
120    }
121
122    #[inline(never)]
123    pub fn is_subset_of(&self, other: &Self) -> bool {
124        self.set.is_subset(&other.set)
125    }
126
127    #[inline(never)]
128    pub fn to_vec(&self) -> Vec<T> {
129        let mut res = Vec::<T>::new();
130        for item in self.set.iter() {
131            res.push(*item)
132        }
133        // Don't delete this.  It is important.
134        res.sort_unstable();
135        res
136    }
137
138    #[inline(never)]
139    pub fn from_vec(vec: Vec<T>) -> Self {
140        let mut res = Set::<T>::empty();
141        for x in vec {
142            res.insert(x);
143        }
144        res
145    }
146
147    #[inline(never)]
148    pub fn equals(&self, other: &Self) -> bool {
149        self.set == other.set
150    }
151
152    #[inline(never)]
153    pub fn retain<F>(&mut self, f: F)
154    where
155        F: FnMut(&T) -> bool,
156    {
157        self.set.retain(f)
158    }
159
160    #[inline(never)]
161    pub fn map<F, U>(&self, f: F) -> Set<U>
162    where
163        F: Fn(&T) -> U,
164        U: Eq + Ord + Hash + Copy + fmt::Debug,
165    {
166        Set {
167            set: self.set.iter().map(f).collect(),
168        }
169    }
170
171    #[inline(never)]
172    pub fn filter_map<F, U>(&self, f: F) -> Set<U>
173    where
174        F: Fn(&T) -> Option<U>,
175        U: Eq + Ord + Hash + Copy + fmt::Debug,
176    {
177        Set {
178            set: self.set.iter().filter_map(f).collect(),
179        }
180    }
181
182    pub fn clear(&mut self) {
183        self.set.clear();
184    }
185}
186
187impl<T: Eq + Ord + Hash + Copy + fmt::Debug> fmt::Debug for Set<T> {
188    #[inline(never)]
189    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
190        // Print the elements in some way which depends only on what is
191        // present in the set, and not on any other factor.  In particular,
192        // <Debug for FxHashSet> has been observed to to print the elements
193        // of a two element set in both orders on different occasions.
194        let sorted_vec = self.to_vec();
195        let mut s = "{".to_string();
196        for i in 0..sorted_vec.len() {
197            if i > 0 {
198                s = s + &", ".to_string();
199            }
200            s = s + &format!("{:?}", &sorted_vec[i]);
201        }
202        s = s + &"}".to_string();
203        write!(fmt, "{}", s)
204    }
205}
206
207pub struct SetIter<'a, T> {
208    set_iter: std::collections::hash_set::Iter<'a, T>,
209}
210impl<T: Eq + Hash> Set<T> {
211    pub fn iter(&self) -> SetIter<T> {
212        SetIter {
213            set_iter: self.set.iter(),
214        }
215    }
216}
217impl<'a, T> Iterator for SetIter<'a, T> {
218    type Item = &'a T;
219    fn next(&mut self) -> Option<Self::Item> {
220        self.set_iter.next()
221    }
222}
223
224//=============================================================================
225// Iteration boilerplate for entities.  The only purpose of this is to support
226// constructions of the form
227//
228//   for ent in startEnt .dotdot( endPlus1Ent ) {
229//   }
230//
231// until such time as `trait Step` is available in stable Rust.  At that point
232// `fn dotdot` and all of the following can be removed, and the loops
233// rewritten using the standard syntax:
234//
235//   for ent in startEnt .. endPlus1Ent {
236//   }
237
238pub trait Zero {
239    fn zero() -> Self;
240}
241
242pub trait PlusN {
243    fn plus_n(&self, n: usize) -> Self;
244}
245
246#[derive(Clone, Copy)]
247#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
248pub struct Range<T> {
249    first: T,
250    len: usize,
251}
252
253impl<T: Copy + PartialOrd + PlusN> IntoIterator for Range<T> {
254    type Item = T;
255    type IntoIter = MyIterator<T>;
256    fn into_iter(self) -> Self::IntoIter {
257        MyIterator {
258            range: self,
259            next: self.first,
260        }
261    }
262}
263
264impl<T: Copy + Eq + Ord + PlusN> Range<T> {
265    /// Create a new range object.
266    pub fn new(from: T, len: usize) -> Range<T> {
267        Range { first: from, len }
268    }
269
270    pub fn start(&self) -> T {
271        self.first
272    }
273
274    pub fn first(&self) -> T {
275        assert!(self.len() > 0);
276        self.start()
277    }
278
279    pub fn last(&self) -> T {
280        assert!(self.len() > 0);
281        self.start().plus_n(self.len() - 1)
282    }
283
284    pub fn last_plus1(&self) -> T {
285        self.start().plus_n(self.len())
286    }
287
288    pub fn len(&self) -> usize {
289        self.len
290    }
291
292    pub fn contains(&self, t: T) -> bool {
293        t >= self.first && t < self.first.plus_n(self.len)
294    }
295}
296
297pub struct MyIterator<T> {
298    range: Range<T>,
299    next: T,
300}
301impl<T: Copy + PartialOrd + PlusN> Iterator for MyIterator<T> {
302    type Item = T;
303    fn next(&mut self) -> Option<Self::Item> {
304        if self.next >= self.range.first.plus_n(self.range.len) {
305            None
306        } else {
307            let res = Some(self.next);
308            self.next = self.next.plus_n(1);
309            res
310        }
311    }
312}
313
314//=============================================================================
315// Vectors where both the index and element types can be specified (and at
316// most 2^32-1 elems can be stored.  What if this overflows?)
317
318pub struct TypedIxVec<TyIx, Ty> {
319    vek: Vec<Ty>,
320    ty_ix: PhantomData<TyIx>,
321}
322
323impl<TyIx, Ty> TypedIxVec<TyIx, Ty>
324where
325    Ty: Clone,
326    TyIx: Copy + Eq + Ord + Zero + PlusN + Into<u32>,
327{
328    pub fn new() -> Self {
329        Self {
330            vek: Vec::new(),
331            ty_ix: PhantomData::<TyIx>,
332        }
333    }
334    pub fn from_vec(vek: Vec<Ty>) -> Self {
335        Self {
336            vek,
337            ty_ix: PhantomData::<TyIx>,
338        }
339    }
340    pub fn append(&mut self, other: &mut TypedIxVec<TyIx, Ty>) {
341        // FIXME what if this overflows?
342        self.vek.append(&mut other.vek);
343    }
344    pub fn iter(&self) -> Iter<Ty> {
345        self.vek.iter()
346    }
347    pub fn iter_mut(&mut self) -> IterMut<Ty> {
348        self.vek.iter_mut()
349    }
350    pub fn len(&self) -> u32 {
351        // FIXME what if this overflows?
352        self.vek.len() as u32
353    }
354    pub fn push(&mut self, item: Ty) {
355        // FIXME what if this overflows?
356        self.vek.push(item);
357    }
358    pub fn resize(&mut self, new_len: u32, value: Ty) {
359        self.vek.resize(new_len as usize, value);
360    }
361    pub fn reserve(&mut self, additional: usize) {
362        self.vek.reserve(additional);
363    }
364    pub fn elems(&self) -> &[Ty] {
365        &self.vek[..]
366    }
367    pub fn elems_mut(&mut self) -> &mut [Ty] {
368        &mut self.vek[..]
369    }
370    pub fn range(&self) -> Range<TyIx> {
371        Range::new(TyIx::zero(), self.len() as usize)
372    }
373    pub fn remove(&mut self, idx: TyIx) -> Ty {
374        self.vek.remove(idx.into() as usize)
375    }
376    pub fn sort_by<F: FnMut(&Ty, &Ty) -> Ordering>(&mut self, compare: F) {
377        self.vek.sort_by(compare)
378    }
379    pub fn sort_unstable_by<F: FnMut(&Ty, &Ty) -> Ordering>(&mut self, compare: F) {
380        self.vek.sort_unstable_by(compare)
381    }
382    pub fn clear(&mut self) {
383        self.vek.clear();
384    }
385}
386
387impl<TyIx, Ty> Index<TyIx> for TypedIxVec<TyIx, Ty>
388where
389    TyIx: Into<u32>,
390{
391    type Output = Ty;
392    fn index(&self, ix: TyIx) -> &Ty {
393        &self.vek[ix.into() as usize]
394    }
395}
396
397impl<TyIx, Ty> IndexMut<TyIx> for TypedIxVec<TyIx, Ty>
398where
399    TyIx: Into<u32>,
400{
401    fn index_mut(&mut self, ix: TyIx) -> &mut Ty {
402        &mut self.vek[ix.into() as usize]
403    }
404}
405
406impl<TyIx, Ty> Clone for TypedIxVec<TyIx, Ty>
407where
408    Ty: Clone,
409{
410    // This is only needed for debug printing.
411    fn clone(&self) -> Self {
412        Self {
413            vek: self.vek.clone(),
414            ty_ix: PhantomData::<TyIx>,
415        }
416    }
417}
418
419//=============================================================================
420
421macro_rules! generate_boilerplate {
422    ($TypeIx:ident, $Type:ident, $PrintingPrefix:expr) => {
423        #[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
424        #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
425        // Firstly, the indexing type (TypeIx)
426        pub enum $TypeIx {
427            $TypeIx(u32),
428        }
429        impl $TypeIx {
430            #[allow(dead_code)]
431            #[inline(always)]
432            pub fn new(n: u32) -> Self {
433                debug_assert!(n != u32::max_value());
434                Self::$TypeIx(n)
435            }
436            #[allow(dead_code)]
437            #[inline(always)]
438            pub const fn max_value() -> Self {
439                Self::$TypeIx(u32::max_value() - 1)
440            }
441            #[allow(dead_code)]
442            #[inline(always)]
443            pub const fn min_value() -> Self {
444                Self::$TypeIx(u32::min_value())
445            }
446            #[allow(dead_code)]
447            #[inline(always)]
448            pub const fn invalid_value() -> Self {
449                Self::$TypeIx(u32::max_value())
450            }
451            #[allow(dead_code)]
452            #[inline(always)]
453            pub fn is_valid(self) -> bool {
454                self != Self::invalid_value()
455            }
456            #[allow(dead_code)]
457            #[inline(always)]
458            pub fn is_invalid(self) -> bool {
459                self == Self::invalid_value()
460            }
461            #[allow(dead_code)]
462            #[inline(always)]
463            pub fn get(self) -> u32 {
464                debug_assert!(self.is_valid());
465                match self {
466                    $TypeIx::$TypeIx(n) => n,
467                }
468            }
469            #[allow(dead_code)]
470            #[inline(always)]
471            pub fn plus(self, delta: u32) -> $TypeIx {
472                debug_assert!(self.is_valid());
473                $TypeIx::$TypeIx(self.get() + delta)
474            }
475            #[allow(dead_code)]
476            #[inline(always)]
477            pub fn minus(self, delta: u32) -> $TypeIx {
478                debug_assert!(self.is_valid());
479                $TypeIx::$TypeIx(self.get() - delta)
480            }
481            #[allow(dead_code)]
482            pub fn dotdot(&self, last_plus1: $TypeIx) -> Range<$TypeIx> {
483                debug_assert!(self.is_valid());
484                let len = (last_plus1.get() - self.get()) as usize;
485                Range::new(*self, len)
486            }
487        }
488        impl fmt::Debug for $TypeIx {
489            fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
490                if self.is_invalid() {
491                    write!(fmt, "{}<NONE>", $PrintingPrefix)
492                } else {
493                    write!(fmt, "{}{}", $PrintingPrefix, &self.get())
494                }
495            }
496        }
497        impl PlusN for $TypeIx {
498            #[inline(always)]
499            fn plus_n(&self, n: usize) -> Self {
500                debug_assert!(self.is_valid());
501                self.plus(n as u32)
502            }
503        }
504        impl Into<u32> for $TypeIx {
505            #[inline(always)]
506            fn into(self) -> u32 {
507                debug_assert!(self.is_valid());
508                self.get()
509            }
510        }
511        impl Zero for $TypeIx {
512            #[inline(always)]
513            fn zero() -> Self {
514                $TypeIx::new(0)
515            }
516        }
517    };
518}
519
520generate_boilerplate!(InstIx, Inst, "i");
521
522generate_boilerplate!(BlockIx, Block, "b");
523
524generate_boilerplate!(RangeFragIx, RangeFrag, "f");
525
526generate_boilerplate!(VirtualRangeIx, VirtualRange, "vr");
527
528generate_boilerplate!(RealRangeIx, RealRange, "rr");
529
530impl<TyIx, Ty: fmt::Debug> fmt::Debug for TypedIxVec<TyIx, Ty> {
531    // This is something of a hack in the sense that it doesn't show the
532    // indices, but oh well ..
533    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
534        write!(fmt, "{:?}", self.vek)
535    }
536}
537
538//=============================================================================
539// Definitions of register classes, registers and stack slots, and printing
540// thereof. Note that this register class definition is meant to be
541// architecture-independent: it simply captures common integer/float/vector
542// types that machines are likely to use. TODO: investigate whether we need a
543// more flexible register-class definition mechanism.
544
545#[derive(PartialEq, Eq, Debug, Clone, Copy)]
546#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
547pub enum RegClass {
548    I32 = 0,
549    F32 = 1,
550    I64 = 2,
551    F64 = 3,
552    V128 = 4,
553    INVALID = 5,
554}
555
556/// The number of register classes that exist.
557/// N.B.: must be <= 7 (fit into 3 bits) for 32-bit VReg/RReg packed format!
558pub const NUM_REG_CLASSES: usize = 5;
559
560impl RegClass {
561    /// Convert a register class to a u32 index.
562    #[inline(always)]
563    pub const fn rc_to_u32(self) -> u32 {
564        self as u32
565    }
566    /// Convert a register class to a usize index.
567    #[inline(always)]
568    pub const fn rc_to_usize(self) -> usize {
569        self as usize
570    }
571    /// Construct a register class from a u32.
572    #[inline(always)]
573    pub fn rc_from_u32(rc: u32) -> RegClass {
574        match rc {
575            0 => RegClass::I32,
576            1 => RegClass::F32,
577            2 => RegClass::I64,
578            3 => RegClass::F64,
579            4 => RegClass::V128,
580            _ => panic!("RegClass::rc_from_u32"),
581        }
582    }
583
584    pub fn short_name(self) -> &'static str {
585        match self {
586            RegClass::I32 => "I",
587            RegClass::I64 => "J",
588            RegClass::F32 => "F",
589            RegClass::F64 => "D",
590            RegClass::V128 => "V",
591            RegClass::INVALID => panic!("RegClass::short_name"),
592        }
593    }
594
595    pub fn long_name(self) -> &'static str {
596        match self {
597            RegClass::I32 => "I32",
598            RegClass::I64 => "I32",
599            RegClass::F32 => "F32",
600            RegClass::F64 => "F32",
601            RegClass::V128 => "V128",
602            RegClass::INVALID => panic!("RegClass::long_name"),
603        }
604    }
605}
606
607// Reg represents both real and virtual registers.  For compactness and speed,
608// these fields are packed into a single u32.  The format is:
609//
610// Virtual Reg:   1  rc:3                index:28
611// Real Reg:      0  rc:3  uu:12  enc:8  index:8
612//
613// `rc` is the register class.  `uu` means "unused".  `enc` is the hardware
614// encoding for the reg.  `index` is a zero based index which has the
615// following meanings:
616//
617// * for a Virtual Reg, `index` is just the virtual register number.
618// * for a Real Reg, `index` is the entry number in the associated
619//   `RealRegUniverse`.
620//
621// This scheme gives us:
622//
623// * a compact (32-bit) representation for registers
624// * fast equality tests for registers
625// * ability to handle up to 2^28 (268.4 million) virtual regs per function
626// * ability to handle up to 8 register classes
627// * ability to handle targets with up to 256 real registers
628// * ability to emit instructions containing real regs without having to
629//   look up encodings in any side tables, since a real reg carries its
630//   encoding
631// * efficient bitsets and arrays of virtual registers, since each has a
632//   zero-based index baked in
633// * efficient bitsets and arrays of real registers, for the same reason
634//
635// This scheme makes it impossible to represent overlapping register classes,
636// but that doesn't seem important.  AFAIK only ARM32 VFP/Neon has that.
637
638#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
639#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
640pub struct Reg {
641    bits: u32,
642}
643
644const INVALID_REG: u32 = 0xffffffff;
645
646impl Reg {
647    #[inline(always)]
648    pub fn is_virtual(self) -> bool {
649        self.is_valid() && (self.bits & 0x8000_0000) != 0
650    }
651    #[inline(always)]
652    pub fn is_real(self) -> bool {
653        self.is_valid() && (self.bits & 0x8000_0000) == 0
654    }
655    pub const fn new_real(rc: RegClass, enc: u8, index: u8) -> Self {
656        let n = (0 << 31) | (rc.rc_to_u32() << 28) | ((enc as u32) << 8) | ((index as u32) << 0);
657        Reg { bits: n }
658    }
659    pub fn new_virtual(rc: RegClass, index: u32) -> Self {
660        if index >= (1 << 28) {
661            panic!("new_virtual(): index too large");
662        }
663        let n = (1 << 31) | (rc.rc_to_u32() << 28) | (index << 0);
664        Reg { bits: n }
665    }
666    pub const fn invalid() -> Reg {
667        Reg { bits: INVALID_REG }
668    }
669    #[inline(always)]
670    pub fn is_invalid(self) -> bool {
671        self.bits == INVALID_REG
672    }
673    #[inline(always)]
674    pub fn is_valid(self) -> bool {
675        !self.is_invalid()
676    }
677    pub fn is_virtual_or_invalid(self) -> bool {
678        self.is_virtual() || self.is_invalid()
679    }
680    pub fn is_real_or_invalid(self) -> bool {
681        self.is_real() || self.is_invalid()
682    }
683    #[inline(always)]
684    pub fn get_class(self) -> RegClass {
685        debug_assert!(self.is_valid());
686        RegClass::rc_from_u32((self.bits >> 28) & 0x7)
687    }
688    #[inline(always)]
689    pub fn get_index(self) -> usize {
690        debug_assert!(self.is_valid());
691        // Return type is usize because typically we will want to use the
692        // result for indexing into a Vec
693        if self.is_virtual() {
694            (self.bits & ((1 << 28) - 1)) as usize
695        } else {
696            (self.bits & ((1 << 8) - 1)) as usize
697        }
698    }
699    #[inline(always)]
700    pub fn get_index_u32(self) -> u32 {
701        debug_assert!(self.is_valid());
702        if self.is_virtual() {
703            self.bits & ((1 << 28) - 1)
704        } else {
705            self.bits & ((1 << 8) - 1)
706        }
707    }
708    pub fn get_hw_encoding(self) -> u8 {
709        debug_assert!(self.is_valid());
710        if self.is_virtual() {
711            panic!("Virtual register does not have a hardware encoding")
712        } else {
713            ((self.bits >> 8) & ((1 << 8) - 1)) as u8
714        }
715    }
716    pub fn as_virtual_reg(self) -> Option<VirtualReg> {
717        // Allow invalid virtual regs as well.
718        if self.is_virtual_or_invalid() {
719            Some(VirtualReg { reg: self })
720        } else {
721            None
722        }
723    }
724    pub fn as_real_reg(self) -> Option<RealReg> {
725        // Allow invalid real regs as well.
726        if self.is_real_or_invalid() {
727            Some(RealReg { reg: self })
728        } else {
729            None
730        }
731    }
732    pub fn show_with_rru(self, univ: &RealRegUniverse) -> String {
733        if self.is_real() && self.get_index() < univ.regs.len() {
734            univ.regs[self.get_index()].1.clone()
735        } else if self.is_valid() {
736            format!("{:?}", self)
737        } else {
738            "rINVALID".to_string()
739        }
740    }
741}
742
743impl fmt::Debug for Reg {
744    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
745        if self.is_valid() {
746            write!(
747                fmt,
748                "{}{}{}",
749                if self.is_virtual() { "v" } else { "r" },
750                self.get_index(),
751                self.get_class().short_name(),
752            )
753        } else {
754            write!(fmt, "rINVALID")
755        }
756    }
757}
758
759// RealReg and VirtualReg are merely wrappers around Reg, which try to
760// dynamically ensure that they are really wrapping the correct flavour of
761// register.
762
763#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
764#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
765pub struct RealReg {
766    reg: Reg,
767}
768impl Reg /* !!not RealReg!! */ {
769    pub fn to_real_reg(self) -> RealReg {
770        if self.is_virtual() {
771            panic!("Reg::to_real_reg: this is a virtual register")
772        } else {
773            RealReg { reg: self }
774        }
775    }
776}
777impl RealReg {
778    pub fn get_class(self) -> RegClass {
779        self.reg.get_class()
780    }
781    #[inline(always)]
782    pub fn get_index(self) -> usize {
783        self.reg.get_index()
784    }
785    pub fn get_hw_encoding(self) -> usize {
786        self.reg.get_hw_encoding() as usize
787    }
788    #[inline(always)]
789    pub fn to_reg(self) -> Reg {
790        self.reg
791    }
792    pub fn invalid() -> RealReg {
793        RealReg {
794            reg: Reg::invalid(),
795        }
796    }
797    pub fn is_valid(self) -> bool {
798        self.reg.is_valid()
799    }
800    pub fn is_invalid(self) -> bool {
801        self.reg.is_invalid()
802    }
803    pub fn maybe_valid(self) -> Option<RealReg> {
804        if self == RealReg::invalid() {
805            None
806        } else {
807            Some(self)
808        }
809    }
810}
811impl fmt::Debug for RealReg {
812    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
813        write!(fmt, "{:?}", self.reg)
814    }
815}
816
817#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
818#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
819pub struct VirtualReg {
820    reg: Reg,
821}
822impl Reg /* !!not VirtualReg!! */ {
823    #[inline(always)]
824    pub fn to_virtual_reg(self) -> VirtualReg {
825        if self.is_virtual() {
826            VirtualReg { reg: self }
827        } else {
828            panic!("Reg::to_virtual_reg: this is a real register")
829        }
830    }
831}
832impl VirtualReg {
833    pub fn get_class(self) -> RegClass {
834        self.reg.get_class()
835    }
836    #[inline(always)]
837    pub fn get_index(self) -> usize {
838        self.reg.get_index()
839    }
840    #[inline(always)]
841    pub fn to_reg(self) -> Reg {
842        self.reg
843    }
844    pub fn invalid() -> VirtualReg {
845        VirtualReg {
846            reg: Reg::invalid(),
847        }
848    }
849    pub fn is_valid(self) -> bool {
850        self.reg.is_valid()
851    }
852    pub fn is_invalid(self) -> bool {
853        self.reg.is_invalid()
854    }
855    pub fn maybe_valid(self) -> Option<VirtualReg> {
856        if self == VirtualReg::invalid() {
857            None
858        } else {
859            Some(self)
860        }
861    }
862}
863impl fmt::Debug for VirtualReg {
864    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
865        write!(fmt, "{:?}", self.reg)
866    }
867}
868
869impl Reg {
870    /// Apply a vreg-rreg mapping to a Reg.  This is used for registers used in
871    /// a read-role.
872    pub fn apply_uses<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
873        self.apply(|vreg| mapper.get_use(vreg));
874    }
875
876    /// Apply a vreg-rreg mapping to a Reg.  This is used for registers used in
877    /// a write-role.
878    pub fn apply_defs<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
879        self.apply(|vreg| mapper.get_def(vreg));
880    }
881
882    /// Apply a vreg-rreg mapping to a Reg.  This is used for registers used in
883    /// a modify-role.
884    pub fn apply_mods<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
885        self.apply(|vreg| mapper.get_mod(vreg));
886    }
887
888    fn apply<F: Fn(VirtualReg) -> Option<RealReg>>(&mut self, f: F) {
889        if let Some(vreg) = self.as_virtual_reg() {
890            if let Some(rreg) = f(vreg) {
891                debug_assert!(rreg.get_class() == vreg.get_class());
892                *self = rreg.to_reg();
893            } else {
894                panic!("Reg::apply: no mapping for {:?}", self);
895            }
896        }
897    }
898}
899
900/// A "writable register". This is a zero-cost wrapper that can be used to
901/// create a distinction, at the Rust type level, between a plain "register"
902/// and a "writable register".
903///
904/// Writable<..> can be used by the client to ensure that, internally, it only
905/// generates instructions that write to registers that should be written. The
906/// `InstRegUses` below, which must be implemented for every instruction,
907/// requires a `Writable<Reg>` (not just `Reg`) in its `defined` and
908/// `modified` sets. While we cannot hide the constructor for `Writable<..>`
909/// from certain parts of the client while exposing it to others, the client
910/// *can* adopt conventions to e.g. only ever call the Writable<..>
911/// constructor from its central vreg-management logic, and decide that any
912/// invocation of this constructor in a machine backend (for example) is an
913/// error.
914#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
915#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
916pub struct Writable<R> {
917    reg: R,
918}
919
920/// Set of requirements for types that can be wrapped in Writable.
921pub trait WritableBase:
922    Copy + Clone + PartialEq + Eq + Hash + PartialOrd + Ord + fmt::Debug
923{
924}
925
926impl<T> WritableBase for T where
927    T: Copy + Clone + PartialEq + Eq + Hash + PartialOrd + Ord + fmt::Debug
928{
929}
930
931impl<R> Writable<R> {
932    /// Create a Writable<R> from an R. The client should carefully audit where
933    /// it calls this constructor to ensure correctness (see `Writable<..>`
934    /// struct documentation).
935    #[inline(always)]
936    pub const fn from_reg(reg: R) -> Writable<R> {
937        Writable { reg }
938    }
939}
940
941impl<R: WritableBase> Writable<R> {
942    /// Get the inner Reg.
943    pub fn to_reg(&self) -> R {
944        self.reg
945    }
946
947    pub fn map<F, U>(&self, f: F) -> Writable<U>
948    where
949        F: Fn(R) -> U,
950        U: WritableBase,
951    {
952        Writable { reg: f(self.reg) }
953    }
954}
955
956#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
957pub struct SpillSlot(u32);
958
959impl SpillSlot {
960    #[inline(always)]
961    pub fn new(n: u32) -> Self {
962        Self(n)
963    }
964    #[inline(always)]
965    pub fn get(self) -> u32 {
966        self.0
967    }
968    #[inline(always)]
969    pub fn get_usize(self) -> usize {
970        self.get() as usize
971    }
972    pub fn round_up(self, num_slots: u32) -> SpillSlot {
973        assert!(num_slots > 0);
974        SpillSlot::new((self.get() + num_slots - 1) / num_slots * num_slots)
975    }
976    pub fn inc(self, num_slots: u32) -> SpillSlot {
977        SpillSlot::new(self.get() + num_slots)
978    }
979}
980
981impl fmt::Debug for SpillSlot {
982    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
983        write!(fmt, "S{}", self.get())
984    }
985}
986
987//=============================================================================
988// Register uses: low level interface
989
990// This minimal struct is visible from outside the regalloc.rs interface.  It
991// is intended to be a safe wrapper around `RegVecs`, which isn't externally
992// visible.  It is used to collect unsanitized reg use info from client
993// instructions.
994pub struct RegUsageCollector<'a> {
995    pub reg_vecs: &'a mut RegVecs,
996}
997
998impl<'a> RegUsageCollector<'a> {
999    pub fn new(reg_vecs: &'a mut RegVecs) -> Self {
1000        Self { reg_vecs }
1001    }
1002    pub fn add_use(&mut self, r: Reg) {
1003        self.reg_vecs.uses.push(r);
1004    }
1005    pub fn add_uses(&mut self, regs: &[Reg]) {
1006        self.reg_vecs.uses.extend(regs.iter());
1007    }
1008    pub fn add_def(&mut self, r: Writable<Reg>) {
1009        self.reg_vecs.defs.push(r.to_reg());
1010    }
1011    pub fn add_defs(&mut self, regs: &[Writable<Reg>]) {
1012        self.reg_vecs.defs.reserve(regs.len());
1013        for r in regs {
1014            self.reg_vecs.defs.push(r.to_reg());
1015        }
1016    }
1017    pub fn add_mod(&mut self, r: Writable<Reg>) {
1018        self.reg_vecs.mods.push(r.to_reg());
1019    }
1020    pub fn add_mods(&mut self, regs: &[Writable<Reg>]) {
1021        self.reg_vecs.mods.reserve(regs.len());
1022        for r in regs {
1023            self.reg_vecs.mods.push(r.to_reg());
1024        }
1025    }
1026
1027    // The presence of the following two is a hack, needed to support fuzzing
1028    // in the test framework.  Real clients should not call them.
1029    pub fn get_use_def_mod_vecs_test_framework_only(&self) -> (Vec<Reg>, Vec<Reg>, Vec<Reg>) {
1030        (
1031            self.reg_vecs.uses.clone(),
1032            self.reg_vecs.defs.clone(),
1033            self.reg_vecs.mods.clone(),
1034        )
1035    }
1036
1037    pub fn get_empty_reg_vecs_test_framework_only(sanitized: bool) -> RegVecs {
1038        RegVecs::new(sanitized)
1039    }
1040}
1041
1042// Everything else is not visible outside the regalloc.rs interface.
1043
1044// There is one of these per function.  Note that `defs` and `mods` lose the
1045// `Writable` constraint at this point.  This is for convenience of having all
1046// three vectors be the same type, but comes at the cost of the loss of being
1047// able to differentiate readonly vs read/write registers in the Rust type
1048// system.
1049#[derive(Debug)]
1050pub struct RegVecs {
1051    pub uses: Vec<Reg>,
1052    pub defs: Vec<Reg>,
1053    pub mods: Vec<Reg>,
1054    sanitized: bool,
1055}
1056
1057impl RegVecs {
1058    pub fn new(sanitized: bool) -> Self {
1059        Self {
1060            uses: vec![],
1061            defs: vec![],
1062            mods: vec![],
1063            sanitized,
1064        }
1065    }
1066    pub fn is_sanitized(&self) -> bool {
1067        self.sanitized
1068    }
1069    pub fn set_sanitized(&mut self, sanitized: bool) {
1070        self.sanitized = sanitized;
1071    }
1072    pub fn clear(&mut self) {
1073        self.uses.clear();
1074        self.defs.clear();
1075        self.mods.clear();
1076    }
1077}
1078
1079// There is one of these per insn, so try and keep it as compact as possible.
1080// I think this should fit in 16 bytes.
1081#[derive(Clone, Debug)]
1082pub struct RegVecBounds {
1083    // These are the group start indices in RegVecs.{uses, defs, mods}.
1084    pub uses_start: u32,
1085    pub defs_start: u32,
1086    pub mods_start: u32,
1087    // And these are the group lengths.  This does limit each instruction to
1088    // mentioning only 256 registers in any group, but that does not seem like a
1089    // problem.
1090    pub uses_len: u8,
1091    pub defs_len: u8,
1092    pub mods_len: u8,
1093}
1094
1095impl RegVecBounds {
1096    pub fn new() -> Self {
1097        Self {
1098            uses_start: 0,
1099            defs_start: 0,
1100            mods_start: 0,
1101            uses_len: 0,
1102            defs_len: 0,
1103            mods_len: 0,
1104        }
1105    }
1106}
1107
1108// This is the primary structure.  We compute just one of these for an entire
1109// function.
1110pub struct RegVecsAndBounds {
1111    // The three vectors of registers.  These can be arbitrarily long.
1112    pub vecs: RegVecs,
1113    // Admin info which tells us the location, for each insn, of its register
1114    // groups in `vecs`.
1115    pub bounds: TypedIxVec<InstIx, RegVecBounds>,
1116}
1117
1118impl RegVecsAndBounds {
1119    pub fn new(vecs: RegVecs, bounds: TypedIxVec<InstIx, RegVecBounds>) -> Self {
1120        Self { vecs, bounds }
1121    }
1122    pub fn is_sanitized(&self) -> bool {
1123        self.vecs.sanitized
1124    }
1125    #[allow(dead_code)] // XXX for some reason, Rustc 1.43.1 thinks this is currently unused.
1126    pub fn num_insns(&self) -> u32 {
1127        self.bounds.len()
1128    }
1129}
1130
1131//=============================================================================
1132// Register uses: convenience interface
1133
1134// Some call sites want to get reg use information as three Sets.  This is a
1135// "convenience facility" which is easier to use but much slower than working
1136// with a whole-function `RegVecsAndBounds`.  It shouldn't be used on critical
1137// paths.
1138#[derive(Debug)]
1139pub struct RegSets {
1140    pub uses: Set<Reg>, // registers that are read.
1141    pub defs: Set<Reg>, // registers that are written.
1142    pub mods: Set<Reg>, // registers that are modified.
1143    sanitized: bool,
1144}
1145
1146impl RegSets {
1147    pub fn new(sanitized: bool) -> Self {
1148        Self {
1149            uses: Set::<Reg>::empty(),
1150            defs: Set::<Reg>::empty(),
1151            mods: Set::<Reg>::empty(),
1152            sanitized,
1153        }
1154    }
1155
1156    pub fn is_sanitized(&self) -> bool {
1157        self.sanitized
1158    }
1159}
1160
1161impl RegVecsAndBounds {
1162    /* !!not RegSets!! */
1163    #[inline(never)]
1164    // Convenience function.  Try to avoid using this.
1165    pub fn get_reg_sets_for_iix(&self, iix: InstIx) -> RegSets {
1166        let bounds = &self.bounds[iix];
1167        let mut regsets = RegSets::new(self.vecs.sanitized);
1168        for i in bounds.uses_start as usize..bounds.uses_start as usize + bounds.uses_len as usize {
1169            regsets.uses.insert(self.vecs.uses[i]);
1170        }
1171        for i in bounds.defs_start as usize..bounds.defs_start as usize + bounds.defs_len as usize {
1172            regsets.defs.insert(self.vecs.defs[i]);
1173        }
1174        for i in bounds.mods_start as usize..bounds.mods_start as usize + bounds.mods_len as usize {
1175            regsets.mods.insert(self.vecs.mods[i]);
1176        }
1177        regsets
1178    }
1179}
1180
1181//=============================================================================
1182// Definitions of the "real register universe".
1183
1184// A "Real Register Universe" is a read-only structure that contains all
1185// information about real registers on a given host.  It serves several
1186// purposes:
1187//
1188// * defines the mapping from real register indices to the registers
1189//   themselves
1190//
1191// * defines the size of the initial section of that mapping that is available
1192//   to the register allocator for use, so that it can treat the registers
1193//   under its control as a zero based, contiguous array.  This is important
1194//   for its efficiency.
1195//
1196// * gives meaning to Set<RealReg>, which otherwise would merely be a bunch of
1197//   bits.
1198
1199#[derive(Clone, Debug)]
1200#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
1201pub struct RealRegUniverse {
1202    // The registers themselves.  All must be real registers, and all must
1203    // have their index number (.get_index()) equal to the array index here,
1204    // since this is the only place where we map index numbers to actual
1205    // registers.
1206    pub regs: Vec<(RealReg, String)>,
1207
1208    // This is the size of the initial section of `regs` that is available to
1209    // the allocator.  It must be <= `regs`.len().
1210    pub allocable: usize,
1211
1212    // Information about groups of allocable registers. Used to quickly address
1213    // only a group of allocable registers belonging to the same register class.
1214    // Indexes into `allocable_by_class` are RegClass values, such as
1215    // RegClass::F32. If the resulting entry is `None` then there are no
1216    // registers in that class.  Otherwise the value is a `RegClassInfo`, which
1217    // provides a register range and possibly information about fixed uses.
1218    pub allocable_by_class: [Option<RegClassInfo>; NUM_REG_CLASSES],
1219}
1220
1221/// Information about a single register class in the `RealRegUniverse`.
1222#[derive(Clone, Copy, Debug)]
1223#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
1224pub struct RegClassInfo {
1225    // Range of allocatable registers in this register class, in terms of
1226    // register indices.
1227    //
1228    // A range (first, last) specifies the range of entries in
1229    // `RealRegUniverse.regs` corresponding to that class.  The range includes
1230    // both `first` and `last`.
1231    //
1232    // In all cases, `last` must be < `RealRegUniverse.allocable`.  In other
1233    // words, all ranges together in `allocable_by_class` must describe only the
1234    // allocable prefix of `regs`.
1235    //
1236    // For example, let's say
1237    //    allocable_by_class[RegClass::F32] ==
1238    //      Some(RegClassInfo { first: 10, last: 14, .. })
1239    // Then regs[10], regs[11], regs[12], regs[13], and regs[14] give all
1240    // registers of register class RegClass::F32.
1241    //
1242    // The effect of the above is that registers in `regs` must form
1243    // contiguous groups. This is checked by RealRegUniverse::check_is_sane().
1244    pub first: usize,
1245    pub last: usize,
1246
1247    // A register, if any, that is *guaranteed* not to be used as a fixed use
1248    // in any code, and so that the register allocator can statically reserve
1249    // for its own use as a temporary. Some register allocators may need such
1250    // a register for various maneuvers, for example a spillslot-to-spillslot
1251    // move when no (other) registers are free.
1252    pub suggested_scratch: Option<usize>,
1253}
1254
1255impl RealRegUniverse {
1256    /// Show it in a pretty way.
1257    pub fn show(&self) -> Vec<String> {
1258        let mut res = vec![];
1259        // Show the allocables
1260        for class_num in 0..NUM_REG_CLASSES {
1261            let class_info = match &self.allocable_by_class[class_num] {
1262                None => continue,
1263                Some(info) => info,
1264            };
1265            let class = RegClass::rc_from_u32(class_num as u32);
1266            let mut class_str = "class ".to_string()
1267                + &class.long_name().to_string()
1268                + &"(".to_string()
1269                + &class.short_name().to_string()
1270                + &") at ".to_string();
1271            class_str = class_str + &format!("[{} .. {}]: ", class_info.first, class_info.last);
1272            for ix in class_info.first..=class_info.last {
1273                class_str = class_str + &self.regs[ix].1;
1274                if let Some(suggested_ix) = class_info.suggested_scratch {
1275                    if ix == suggested_ix {
1276                        class_str = class_str + "*";
1277                    }
1278                }
1279                class_str = class_str + " ";
1280            }
1281            res.push(class_str);
1282        }
1283        // And the non-allocables
1284        if self.allocable < self.regs.len() {
1285            let mut stragglers = format!(
1286                "not allocable at [{} .. {}]: ",
1287                self.allocable,
1288                self.regs.len() - 1
1289            );
1290            for ix in self.allocable..self.regs.len() {
1291                stragglers = stragglers + &self.regs[ix].1 + &" ".to_string();
1292            }
1293            res.push(stragglers);
1294        }
1295        res
1296    }
1297
1298    /// Check that the given universe satisfies various invariants, and panic
1299    /// if not.  All the invariants are important.
1300    pub fn check_is_sane(&self) {
1301        let regs_len = self.regs.len();
1302        let regs_allocable = self.allocable;
1303        // The universe must contain at most 256 registers.  That's because
1304        // `Reg` only has an 8-bit index value field, so if the universe
1305        // contained more than 256 registers, we'd never be able to index into
1306        // entries 256 and above.  This is no limitation in practice since all
1307        // targets we're interested in contain (many) fewer than 256 regs in
1308        // total.
1309        let mut ok = regs_len <= 256;
1310        // The number of allocable registers must not exceed the number of
1311        // `regs` presented.  In general it will be less, since the universe
1312        // will list some registers (stack pointer, etc) which are not
1313        // available for allocation.
1314        if ok {
1315            ok = regs_allocable <= regs_len;
1316        }
1317        // All registers must have an index value which points back at the
1318        // `regs` slot they are in.  Also they really must be real regs.
1319        if ok {
1320            for i in 0..regs_len {
1321                let (reg, _name) = &self.regs[i];
1322                if ok && (reg.to_reg().is_virtual() || reg.get_index() != i) {
1323                    ok = false;
1324                }
1325            }
1326        }
1327        // The allocatable regclass groupings defined by `allocable_first` and
1328        // `allocable_last` must be contiguous.
1329        if ok {
1330            let mut regclass_used = [false; NUM_REG_CLASSES];
1331            for rc in 0..NUM_REG_CLASSES {
1332                regclass_used[rc] = false;
1333            }
1334            for i in 0..regs_allocable {
1335                let (reg, _name) = &self.regs[i];
1336                let rc = reg.get_class().rc_to_u32() as usize;
1337                regclass_used[rc] = true;
1338            }
1339            // Scan forward through each grouping, checking that the listed
1340            // registers really are of the claimed class.  Also count the
1341            // total number visited.  This seems a fairly reliable way to
1342            // ensure that the groupings cover all allocated registers exactly
1343            // once, and that all classes are contiguous groups.
1344            let mut regs_visited = 0;
1345            for rc in 0..NUM_REG_CLASSES {
1346                match &self.allocable_by_class[rc] {
1347                    &None => {
1348                        if regclass_used[rc] {
1349                            ok = false;
1350                        }
1351                    }
1352                    &Some(RegClassInfo {
1353                        first,
1354                        last,
1355                        suggested_scratch,
1356                    }) => {
1357                        if !regclass_used[rc] {
1358                            ok = false;
1359                        }
1360                        if ok {
1361                            for i in first..last + 1 {
1362                                let (reg, _name) = &self.regs[i];
1363                                if ok && RegClass::rc_from_u32(rc as u32) != reg.get_class() {
1364                                    ok = false;
1365                                }
1366                                regs_visited += 1;
1367                            }
1368                        }
1369                        if ok {
1370                            if let Some(s) = suggested_scratch {
1371                                if s < first || s > last {
1372                                    ok = false;
1373                                }
1374                            }
1375                        }
1376                    }
1377                }
1378            }
1379            if ok && regs_visited != regs_allocable {
1380                ok = false;
1381            }
1382        }
1383        // So finally ..
1384        if !ok {
1385            panic!("RealRegUniverse::check_is_sane: invalid RealRegUniverse");
1386        }
1387    }
1388}
1389
1390//=============================================================================
1391// Representing and printing of live range fragments.
1392
1393#[derive(Copy, Clone, Hash, PartialEq, Eq, Ord)]
1394// There are four "points" within an instruction that are of interest, and
1395// these have a total ordering: R < U < D < S.  They are:
1396//
1397// * R(eload): this is where any reload insns for the insn itself are
1398//   considered to live.
1399//
1400// * U(se): this is where the insn is considered to use values from those of
1401//   its register operands that appear in a Read or Modify role.
1402//
1403// * D(ef): this is where the insn is considered to define new values for
1404//   those of its register operands that appear in a Write or Modify role.
1405//
1406// * S(pill): this is where any spill insns for the insn itself are considered
1407//   to live.
1408//
1409// Instructions in the incoming Func may only exist at the U and D points,
1410// and so their associated live range fragments will only mention the U and D
1411// points.  However, when adding spill code, we need a way to represent live
1412// ranges involving the added spill and reload insns, in which case R and S
1413// come into play:
1414//
1415// * A reload for instruction i is considered to be live from i.R to i.U.
1416//
1417// * A spill for instruction i is considered to be live from i.D to i.S.
1418
1419pub enum Point {
1420    // The values here are important.  Don't change them.
1421    Reload = 0,
1422    Use = 1,
1423    Def = 2,
1424    Spill = 3,
1425}
1426
1427impl Point {
1428    #[inline(always)]
1429    pub fn is_reload(self) -> bool {
1430        match self {
1431            Point::Reload => true,
1432            _ => false,
1433        }
1434    }
1435    #[inline(always)]
1436    pub fn is_use(self) -> bool {
1437        match self {
1438            Point::Use => true,
1439            _ => false,
1440        }
1441    }
1442    #[inline(always)]
1443    pub fn is_def(self) -> bool {
1444        match self {
1445            Point::Def => true,
1446            _ => false,
1447        }
1448    }
1449    #[inline(always)]
1450    pub fn is_spill(self) -> bool {
1451        match self {
1452            Point::Spill => true,
1453            _ => false,
1454        }
1455    }
1456    #[inline(always)]
1457    pub fn is_use_or_def(self) -> bool {
1458        self.is_use() || self.is_def()
1459    }
1460}
1461
1462impl PartialOrd for Point {
1463    // In short .. R < U < D < S.  This is probably what would be #derive'd
1464    // anyway, but we need to be sure.
1465    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1466        (*self as u32).partial_cmp(&(*other as u32))
1467    }
1468}
1469
1470// See comments below on `RangeFrag` for the meaning of `InstPoint`.
1471#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
1472pub struct InstPoint {
1473    /// This is conceptually:
1474    ///   pub iix: InstIx,
1475    ///   pub pt: Point,
1476    ///
1477    /// but packed into a single 32 bit word, so as
1478    /// (1) to ensure it is only 32 bits (and hence to guarantee that `RangeFrag`
1479    ///     is 64 bits), and
1480    /// (2) to make it possible to implement `PartialOrd` using `PartialOrd`
1481    ///     directly on 32 bit words (and hence we let it be derived).
1482    ///
1483    /// This has the format:
1484    ///    InstIx as bits 31:2,  Point as bits 1:0.
1485    ///
1486    /// It does give the slight limitation that all InstIxs must be < 2^30, but
1487    /// that's hardly a big deal: the analysis module rejects any input with 2^24
1488    /// or more Insns.
1489    ///
1490    /// Do not access this directly:
1491    bits: u32,
1492}
1493
1494impl InstPoint {
1495    #[inline(always)]
1496    pub fn new(iix: InstIx, pt: Point) -> Self {
1497        let iix_n = iix.get();
1498        assert!(iix_n < 0x4000_0000u32);
1499        let pt_n = pt as u32;
1500        InstPoint {
1501            bits: (iix_n << 2) | pt_n,
1502        }
1503    }
1504    #[inline(always)]
1505    pub fn iix(self) -> InstIx {
1506        InstIx::new(self.bits >> 2)
1507    }
1508    #[inline(always)]
1509    pub fn pt(self) -> Point {
1510        match self.bits & 3 {
1511            0 => Point::Reload,
1512            1 => Point::Use,
1513            2 => Point::Def,
1514            3 => Point::Spill,
1515            // This can never happen, but rustc doesn't seem to know that.
1516            _ => panic!("InstPt::pt: unreachable case"),
1517        }
1518    }
1519    #[inline(always)]
1520    pub fn set_iix(&mut self, iix: InstIx) {
1521        let iix_n = iix.get();
1522        assert!(iix_n < 0x4000_0000u32);
1523        self.bits = (iix_n << 2) | (self.bits & 3);
1524    }
1525    #[inline(always)]
1526    pub fn set_pt(&mut self, pt: Point) {
1527        self.bits = (self.bits & 0xFFFF_FFFCu32) | pt as u32;
1528    }
1529    #[inline(always)]
1530    pub fn new_reload(iix: InstIx) -> Self {
1531        InstPoint::new(iix, Point::Reload)
1532    }
1533    #[inline(always)]
1534    pub fn new_use(iix: InstIx) -> Self {
1535        InstPoint::new(iix, Point::Use)
1536    }
1537    #[inline(always)]
1538    pub fn new_def(iix: InstIx) -> Self {
1539        InstPoint::new(iix, Point::Def)
1540    }
1541    #[inline(always)]
1542    pub fn new_spill(iix: InstIx) -> Self {
1543        InstPoint::new(iix, Point::Spill)
1544    }
1545    #[inline(always)]
1546    pub fn invalid_value() -> Self {
1547        Self {
1548            bits: 0xFFFF_FFFFu32,
1549        }
1550    }
1551    #[inline(always)]
1552    pub fn max_value() -> Self {
1553        Self {
1554            bits: 0xFFFF_FFFFu32,
1555        }
1556    }
1557    #[inline(always)]
1558    pub fn min_value() -> Self {
1559        Self { bits: 0u32 }
1560    }
1561}
1562
1563impl fmt::Debug for InstPoint {
1564    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1565        write!(
1566            fmt,
1567            "{:?}{}",
1568            self.iix(),
1569            match self.pt() {
1570                Point::Reload => ".r",
1571                Point::Use => ".u",
1572                Point::Def => ".d",
1573                Point::Spill => ".s",
1574            }
1575        )
1576    }
1577}
1578
1579//=============================================================================
1580// Live Range Fragments, and their metrics
1581
1582// A Live Range Fragment (RangeFrag) describes a consecutive sequence of one or
1583// more instructions, in which a Reg is "live".  The sequence must exist
1584// entirely inside only one basic block.
1585//
1586// However, merely indicating the start and end instruction numbers isn't
1587// enough: we must also include a "Use or Def" indication.  These indicate two
1588// different "points" within each instruction: the Use position, where
1589// incoming registers are read, and the Def position, where outgoing registers
1590// are written.  The Use position is considered to come before the Def
1591// position, as described for `Point` above.
1592//
1593// When we come to generate spill/restore live ranges, Point::S and Point::R
1594// also come into play.  Live ranges (and hence, RangeFrags) that do not perform
1595// spills or restores should not use either of Point::S or Point::R.
1596//
1597// The set of positions denoted by
1598//
1599//    {0 .. #insns-1} x {Reload point, Use point, Def point, Spill point}
1600//
1601// is exactly the set of positions that we need to keep track of when mapping
1602// live ranges to registers.  This the reason for the type InstPoint.  Note
1603// that InstPoint values have a total ordering, at least within a single basic
1604// block: the insn number is used as the primary key, and the Point part is
1605// the secondary key, with Reload < Use < Def < Spill.
1606#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
1607pub struct RangeFrag {
1608    pub first: InstPoint,
1609    pub last: InstPoint,
1610}
1611
1612impl fmt::Debug for RangeFrag {
1613    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1614        write!(fmt, "(RF: {:?}-{:?})", self.first, self.last)
1615    }
1616}
1617
1618impl RangeFrag {
1619    #[allow(dead_code)] // XXX for some reason, Rustc 1.43.1 thinks this is unused.
1620    pub fn new(first: InstPoint, last: InstPoint) -> Self {
1621        debug_assert!(first <= last);
1622        RangeFrag { first, last }
1623    }
1624
1625    pub fn invalid_value() -> Self {
1626        Self {
1627            first: InstPoint::invalid_value(),
1628            last: InstPoint::invalid_value(),
1629        }
1630    }
1631
1632    pub fn new_with_metrics<F: Function>(
1633        f: &F,
1634        bix: BlockIx,
1635        first: InstPoint,
1636        last: InstPoint,
1637        count: u16,
1638    ) -> (Self, RangeFragMetrics) {
1639        debug_assert!(f.block_insns(bix).len() >= 1);
1640        debug_assert!(f.block_insns(bix).contains(first.iix()));
1641        debug_assert!(f.block_insns(bix).contains(last.iix()));
1642        debug_assert!(first <= last);
1643        if first == last {
1644            debug_assert!(count == 1);
1645        }
1646        let first_iix_in_block = f.block_insns(bix).first();
1647        let last_iix_in_block = f.block_insns(bix).last();
1648        let first_pt_in_block = InstPoint::new_use(first_iix_in_block);
1649        let last_pt_in_block = InstPoint::new_def(last_iix_in_block);
1650        let kind = match (first == first_pt_in_block, last == last_pt_in_block) {
1651            (false, false) => RangeFragKind::Local,
1652            (false, true) => RangeFragKind::LiveOut,
1653            (true, false) => RangeFragKind::LiveIn,
1654            (true, true) => RangeFragKind::Thru,
1655        };
1656        (
1657            RangeFrag { first, last },
1658            RangeFragMetrics { bix, kind, count },
1659        )
1660    }
1661}
1662
1663// Comparison of RangeFrags.  They form a partial order.
1664
1665pub fn cmp_range_frags(f1: &RangeFrag, f2: &RangeFrag) -> Option<Ordering> {
1666    if f1.last < f2.first {
1667        return Some(Ordering::Less);
1668    }
1669    if f1.first > f2.last {
1670        return Some(Ordering::Greater);
1671    }
1672    if f1.first == f2.first && f1.last == f2.last {
1673        return Some(Ordering::Equal);
1674    }
1675    None
1676}
1677
1678impl RangeFrag {
1679    pub fn contains(&self, ipt: &InstPoint) -> bool {
1680        self.first <= *ipt && *ipt <= self.last
1681    }
1682}
1683
1684// A handy summary hint for a RangeFrag.  Note that none of these are correct
1685// if the RangeFrag has been extended so as to cover multiple basic blocks.
1686// But that ("RangeFrag compression") is something done locally within each
1687// algorithm (BT and LSRA).  The analysis-phase output will not include any
1688// such compressed RangeFrags.
1689#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
1690pub enum RangeFragKind {
1691    Local,   // Fragment exists entirely inside one block
1692    LiveIn,  // Fragment is live in to a block, but ends inside it
1693    LiveOut, // Fragment is live out of a block, but starts inside it
1694    Thru,    // Fragment is live through the block (starts and ends outside it)
1695}
1696
1697impl fmt::Debug for RangeFragKind {
1698    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1699        match self {
1700            RangeFragKind::Local => write!(fmt, "Local"),
1701            RangeFragKind::LiveIn => write!(fmt, "LiveIn"),
1702            RangeFragKind::LiveOut => write!(fmt, "LiveOut"),
1703            RangeFragKind::Thru => write!(fmt, "Thru"),
1704        }
1705    }
1706}
1707
1708// `RangeFrags` resulting from the initial analysis phase (analysis_data_flow.rs)
1709// exist only within single basic blocks, and therefore have some associated
1710// metrics, held by `RangeFragMetrics`:
1711//
1712// * a `count` field, which is a u16 indicating how often the associated storage
1713//   unit (Reg) is mentioned inside the RangeFrag.  It is assumed that the RangeFrag
1714//   is associated with some Reg.  If not, the `count` field is meaningless.  This
1715//   field has no effect on the correctness of the resulting allocation.  It is used
1716//   however in the estimation of `VirtualRange` spill costs, which are important
1717//   for prioritising which `VirtualRange`s get assigned a register vs which have
1718//   to be spilled.
1719//
1720// * `bix` field, which indicates which `Block` the fragment exists in.  This
1721//   field is actually redundant, since the containing `Block` can be inferred,
1722//   laboriously, from the associated `RangeFrag`s `first` and `last` fields,
1723//   providing you have an `InstIxToBlockIx` mapping table to hand.  It is included
1724//   here for convenience.
1725//
1726// * `kind` is another convenience field, indicating how the range is included
1727//   within its owning block.
1728//
1729// The analysis phase (fn `deref_and_compress_sorted_range_frag_ixs`)
1730// compresses ranges and as a result breaks the invariant that a `RangeFrag`
1731// exists only within a single `Block`.  For a `RangeFrag` spanning multiple
1732// `Block`s, all three `RangeFragMetric` fields are meaningless.  This is the
1733// reason for separating `RangeFrag` and `RangeFragMetrics` -- so that it is
1734// possible to merge `RangeFrag`s without being forced to create fake values
1735// for the metrics fields.
1736#[derive(Clone, PartialEq)]
1737pub struct RangeFragMetrics {
1738    pub bix: BlockIx,
1739    pub kind: RangeFragKind,
1740    pub count: u16,
1741}
1742
1743impl fmt::Debug for RangeFragMetrics {
1744    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1745        write!(
1746            fmt,
1747            "(RFM: {:?}, count={}, {:?})",
1748            self.kind, self.count, self.bix
1749        )
1750    }
1751}
1752
1753//=============================================================================
1754// Vectors of RangeFragIxs, sorted so that the associated RangeFrags are in
1755// ascending order, per their InstPoint fields.  The associated RangeFrags may
1756// not overlap.
1757//
1758// The "fragment environment" (usually called "frag_env"), to which the
1759// RangeFragIxs refer, is not stored here.
1760
1761#[derive(Clone)]
1762pub(crate) struct SortedRangeFragIxs(SmallVec<[RangeFragIx; 4]>);
1763
1764impl fmt::Debug for SortedRangeFragIxs {
1765    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1766        self.0.fmt(fmt)
1767    }
1768}
1769
1770impl Deref for SortedRangeFragIxs {
1771    type Target = SmallVec<[RangeFragIx; 4]>;
1772    #[inline(always)]
1773    fn deref(&self) -> &Self::Target {
1774        &self.0
1775    }
1776}
1777
1778impl SortedRangeFragIxs {
1779    fn check(&self, fenv: &TypedIxVec<RangeFragIx, RangeFrag>) {
1780        for i in 1..self.0.len() {
1781            let prev_frag = &fenv[self.0[i - 1]];
1782            let this_frag = &fenv[self.0[i]];
1783            if cmp_range_frags(prev_frag, this_frag) != Some(Ordering::Less) {
1784                panic!("SortedRangeFragIxs::check: vector not ok");
1785            }
1786        }
1787    }
1788
1789    fn sort(&mut self, fenv: &TypedIxVec<RangeFragIx, RangeFrag>) {
1790        self.0.sort_unstable_by(|fix_a, fix_b| {
1791            match cmp_range_frags(&fenv[*fix_a], &fenv[*fix_b]) {
1792                Some(Ordering::Less) => Ordering::Less,
1793                Some(Ordering::Greater) => Ordering::Greater,
1794                Some(Ordering::Equal) | None => {
1795                    panic!("SortedRangeFragIxs::sort: overlapping Frags!")
1796                }
1797            }
1798        });
1799    }
1800
1801    pub(crate) fn new(
1802        frag_ixs: SmallVec<[RangeFragIx; 4]>,
1803        fenv: &TypedIxVec<RangeFragIx, RangeFrag>,
1804    ) -> Self {
1805        let mut res = Self(frag_ixs);
1806        // Check the source is ordered, and clone (or sort it).
1807        res.sort(fenv);
1808        res.check(fenv);
1809        res
1810    }
1811
1812    pub(crate) fn unit(fix: RangeFragIx, fenv: &TypedIxVec<RangeFragIx, RangeFrag>) -> Self {
1813        let mut res = Self(SmallVec::<[RangeFragIx; 4]>::new());
1814        res.0.push(fix);
1815        res.check(fenv);
1816        res
1817    }
1818
1819    /// Does this sorted list of range fragments contain the given instruction point?
1820    pub(crate) fn contains_pt(
1821        &self,
1822        fenv: &TypedIxVec<RangeFragIx, RangeFrag>,
1823        pt: InstPoint,
1824    ) -> bool {
1825        self.0
1826            .binary_search_by(|&ix| {
1827                let frag = &fenv[ix];
1828                if pt < frag.first {
1829                    Ordering::Greater
1830                } else if pt >= frag.first && pt <= frag.last {
1831                    Ordering::Equal
1832                } else {
1833                    Ordering::Less
1834                }
1835            })
1836            .is_ok()
1837    }
1838}
1839
1840/// Vectors of RangeFrags, sorted so that they are in ascending order, per
1841/// their InstPoint fields.  The RangeFrags may not overlap.
1842#[derive(Clone)]
1843pub(crate) struct SortedRangeFrags(SmallVec<[RangeFrag; 4]>);
1844
1845impl fmt::Debug for SortedRangeFrags {
1846    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1847        self.0.fmt(fmt)
1848    }
1849}
1850
1851impl Deref for SortedRangeFrags {
1852    type Target = SmallVec<[RangeFrag; 4]>;
1853    #[inline(always)]
1854    fn deref(&self) -> &Self::Target {
1855        &self.0
1856    }
1857}
1858
1859impl DerefMut for SortedRangeFrags {
1860    #[inline(always)]
1861    fn deref_mut(&mut self) -> &mut Self::Target {
1862        &mut self.0
1863    }
1864}
1865
1866impl SortedRangeFrags {
1867    pub(crate) fn unit(frag: RangeFrag) -> Self {
1868        let mut res = Self(SmallVec::<[RangeFrag; 4]>::new());
1869        res.0.push(frag);
1870        res
1871    }
1872
1873    pub(crate) fn empty() -> Self {
1874        Self(SmallVec::<[RangeFrag; 4]>::new())
1875    }
1876
1877    pub(crate) fn overlaps(&self, other: &Self) -> bool {
1878        // Since both vectors are sorted and individually non-overlapping, we can establish that
1879        // they are mutually non-overlapping by walking them simultaneously and checking, at each
1880        // step, that there is a unique "next lowest" frag available.
1881        let frags1 = &self.0;
1882        let frags2 = &other.0;
1883        let n1 = frags1.len();
1884        let n2 = frags2.len();
1885        let mut c1 = 0;
1886        let mut c2 = 0;
1887        loop {
1888            if c1 >= n1 || c2 >= n2 {
1889                // We made it to the end of one (or both) vectors without finding any conflicts: no
1890                // overlap.
1891                return false;
1892            }
1893            let f1 = &frags1[c1];
1894            let f2 = &frags2[c2];
1895            match cmp_range_frags(f1, f2) {
1896                Some(Ordering::Less) => c1 += 1,
1897                Some(Ordering::Greater) => c2 += 1,
1898                _ => {
1899                    // There's no unique "next frag" -- either they are identical, or they overlap.
1900                    // So we're done.
1901                    return true;
1902                }
1903            }
1904        }
1905    }
1906
1907    /// Does this sorted list of range fragments contain the given instruction point?
1908    pub(crate) fn contains_pt(&self, pt: InstPoint) -> bool {
1909        self.0
1910            .binary_search_by(|frag| {
1911                if pt < frag.first {
1912                    Ordering::Greater
1913                } else if pt >= frag.first && pt <= frag.last {
1914                    Ordering::Equal
1915                } else {
1916                    Ordering::Less
1917                }
1918            })
1919            .is_ok()
1920    }
1921}
1922
1923//=============================================================================
1924// Representing spill costs.  A spill cost can either be infinite, in which
1925// case the associated VirtualRange may not be spilled, because it's already a
1926// spill/reload range.  Or it can be finite, in which case it must be a 32-bit
1927// floating point number, which is (in the IEEE754 meaning of the terms)
1928// non-infinite, non-NaN and it must be non negative.  In fact it's
1929// meaningless for a VLR to have a zero spill cost (how could that really be
1930// the case?) but we allow it here for convenience.
1931
1932#[derive(Copy, Clone)]
1933pub enum SpillCost {
1934    Infinite,    // Infinite, positive
1935    Finite(f32), // Finite, non-negative
1936}
1937
1938impl fmt::Debug for SpillCost {
1939    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1940        match self {
1941            SpillCost::Infinite => write!(fmt, "INFINITY"),
1942            SpillCost::Finite(c) => write!(fmt, "{:<.3}", c),
1943        }
1944    }
1945}
1946
1947impl SpillCost {
1948    #[inline(always)]
1949    pub fn zero() -> Self {
1950        SpillCost::Finite(0.0)
1951    }
1952    #[inline(always)]
1953    pub fn infinite() -> Self {
1954        SpillCost::Infinite
1955    }
1956    #[inline(always)]
1957    pub fn finite(cost: f32) -> Self {
1958        // "`is_normal` returns true if the number is neither zero, infinite,
1959        // subnormal, or NaN."
1960        assert!(cost.is_normal() || cost == 0.0);
1961        // And also it can't be negative.
1962        assert!(cost >= 0.0);
1963        // Somewhat arbitrarily ..
1964        assert!(cost < 1e18);
1965        SpillCost::Finite(cost)
1966    }
1967    #[inline(always)]
1968    pub fn is_zero(&self) -> bool {
1969        match self {
1970            SpillCost::Infinite => false,
1971            SpillCost::Finite(c) => *c == 0.0,
1972        }
1973    }
1974    #[inline(always)]
1975    pub fn is_infinite(&self) -> bool {
1976        match self {
1977            SpillCost::Infinite => true,
1978            SpillCost::Finite(_) => false,
1979        }
1980    }
1981    #[inline(always)]
1982    pub fn is_finite(&self) -> bool {
1983        !self.is_infinite()
1984    }
1985    #[inline(always)]
1986    pub fn is_less_than(&self, other: &Self) -> bool {
1987        match (self, other) {
1988            // Dubious .. both are infinity
1989            (SpillCost::Infinite, SpillCost::Infinite) => false,
1990            // finite < inf
1991            (SpillCost::Finite(_), SpillCost::Infinite) => true,
1992            // inf is not < finite
1993            (SpillCost::Infinite, SpillCost::Finite(_)) => false,
1994            // straightforward
1995            (SpillCost::Finite(c1), SpillCost::Finite(c2)) => c1 < c2,
1996        }
1997    }
1998    #[inline(always)]
1999    pub fn add(&mut self, other: &Self) {
2000        match (*self, other) {
2001            (SpillCost::Finite(c1), SpillCost::Finite(c2)) => {
2002                // The 10^18 limit above gives us a lot of headroom here, since max
2003                // f32 is around 10^37.
2004                *self = SpillCost::Finite(c1 + c2);
2005            }
2006            (_, _) => {
2007                // All other cases produce an infinity.
2008                *self = SpillCost::Infinite;
2009            }
2010        }
2011    }
2012}
2013
2014//=============================================================================
2015// Representing and printing live ranges.  These are represented by two
2016// different but closely related types, RealRange and VirtualRange.
2017
2018// RealRanges are live ranges for real regs (RealRegs).  VirtualRanges are
2019// live ranges for virtual regs (VirtualRegs).  VirtualRanges are the
2020// fundamental unit of allocation.
2021//
2022// A RealRange pairs a RealReg with a vector of RangeFragIxs in which it is
2023// live.  The RangeFragIxs are indices into some vector of RangeFrags (a
2024// "fragment environment", 'fenv'), which is not specified here.  They are
2025// sorted so as to give ascending order to the RangeFrags which they refer to.
2026//
2027// A VirtualRange pairs a VirtualReg with a vector of RangeFrags in which it
2028// is live.  Same scheme as for a RealRange, except it avoids the overhead of
2029// having to indirect into the fragment environment.
2030//
2031// VirtualRanges also contain metrics:
2032//
2033// * `size` is the number of instructions in total spanned by the LR.  It must
2034//   not be zero.
2035//
2036// * `total cost` is an abstractified measure of the cost of the LR.  Each
2037//   basic block in which the range exists gives a contribution to the `total
2038//   cost`, which is the number of times the register is mentioned in this
2039//   block, multiplied by the estimated execution frequency for the block.
2040//
2041// * `spill_cost` is an abstractified measure of the cost of spilling the LR,
2042//   and is the `total cost` divided by the `size`. The only constraint
2043//   (w.r.t. correctness) is that normal LRs have a `Some` value, whilst
2044//   `None` is reserved for live ranges created for spills and reloads and
2045//   interpreted to mean "infinity".  This is needed to guarantee that
2046//   allocation can always succeed in the worst case, in which all of the
2047//   original live ranges of the program are spilled.
2048//
2049// RealRanges don't carry any metrics info since we are not trying to allocate
2050// them.  We merely need to work around them.
2051//
2052// I find it helpful to think of a live range, both RealRange and
2053// VirtualRange, as a "renaming equivalence class".  That is, if you rename
2054// `reg` at some point inside `sorted_frags`, then you must rename *all*
2055// occurrences of `reg` inside `sorted_frags`, since otherwise the program will
2056// no longer work.
2057
2058#[derive(Clone)]
2059pub struct RealRange {
2060    pub rreg: RealReg,
2061    pub(crate) sorted_frags: SortedRangeFragIxs,
2062    pub is_ref: bool,
2063}
2064
2065impl fmt::Debug for RealRange {
2066    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
2067        write!(
2068            fmt,
2069            "(RR: {:?}{}, {:?})",
2070            self.rreg,
2071            if self.is_ref { " REF" } else { "" },
2072            self.sorted_frags
2073        )
2074    }
2075}
2076
2077impl RealRange {
2078    pub fn show_with_rru(&self, univ: &RealRegUniverse) -> String {
2079        format!(
2080            "(RR: {}{}, {:?})",
2081            self.rreg.to_reg().show_with_rru(univ),
2082            if self.is_ref { " REF" } else { "" },
2083            self.sorted_frags
2084        )
2085    }
2086}
2087
2088// VirtualRanges are live ranges for virtual regs (VirtualRegs).  This does
2089// carry metrics info and also the identity of the RealReg to which it
2090// eventually got allocated.  (Or in the backtracking allocator, the identity
2091// of the RealReg to which it is *currently* assigned; that may be undone at
2092// some later point.)
2093
2094#[derive(Clone)]
2095pub struct VirtualRange {
2096    pub(crate) vreg: VirtualReg,
2097    pub(crate) rreg: Option<RealReg>,
2098    pub(crate) sorted_frags: SortedRangeFrags,
2099    pub(crate) is_ref: bool,
2100    pub(crate) size: u16,
2101    pub(crate) total_cost: u32,
2102    pub(crate) spill_cost: SpillCost, // == total_cost / size
2103}
2104
2105impl VirtualRange {
2106    pub fn overlaps(&self, other: &Self) -> bool {
2107        self.sorted_frags.overlaps(&other.sorted_frags)
2108    }
2109}
2110
2111impl fmt::Debug for VirtualRange {
2112    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
2113        write!(
2114            fmt,
2115            "(VR: {:?}{},",
2116            self.vreg,
2117            if self.is_ref { " REF" } else { "" }
2118        )?;
2119        if self.rreg.is_some() {
2120            write!(fmt, " -> {:?}", self.rreg.unwrap())?;
2121        }
2122        write!(
2123            fmt,
2124            " sz={}, tc={}, sc={:?}, {:?})",
2125            self.size, self.total_cost, self.spill_cost, self.sorted_frags
2126        )
2127    }
2128}
2129
2130//=============================================================================
2131// Some auxiliary/miscellaneous data structures that are useful: RegToRangesMaps
2132
2133/// Mappings from RealRegs and VirtualRegs to the sets of RealRanges and VirtualRanges that belong
2134/// to them.  These are needed for BT's coalescing analysis and for the dataflow analysis that
2135/// supports reftype handling.
2136pub(crate) struct RegToRangesMaps {
2137    /// This maps RealReg indices to the set of RealRangeIxs for that RealReg. Valid indices are
2138    /// real register indices for all non-sanitised real regs; that is,
2139    /// 0..RealRegUniverse::allocable. The Vecs of RealRangeIxs are duplicate-free.
2140    ///
2141    /// The SmallVec capacity of 6 was chosen after quite some profiling, of Cranelift/x64/newBE
2142    /// compiling ZenGarden.wasm -- a huge input, with many relatively small functions. Profiling
2143    /// was performed in August 2020, using Valgrind/DHAT.
2144    pub(crate) rreg_to_rlrs_map: Vec</*real reg ix, */ SmallVec<[RealRangeIx; 6]>>,
2145
2146    /// This maps VirtualReg indices to the set of VirtualRangeIxs for that VirtualReg. Valid
2147    /// indices are 0..Function::get_num_vregs(). For functions mostly translated from SSA,
2148    /// most VirtualRegs will have just one VirtualRange, and there are a lot of VirtualRegs in
2149    /// general. So SmallVec is a definite benefit here.
2150    pub(crate) vreg_to_vlrs_map: Vec</*virtual reg ix, */ SmallVec<[VirtualRangeIx; 3]>>,
2151
2152    /// As an optimisation heuristic for BT's coalescing analysis, these indicate which
2153    /// real/virtual registers have "many" `RangeFrag`s in their live ranges. For some definition
2154    /// of "many", as defined by the `many_frags_thresh` field. This is not important for overall
2155    /// allocation result or correctness: it merely allows the coalescing analysis to switch
2156    /// between two search strategies, one of which is fast for regs with few `RangeFrag`s (the
2157    /// vast majority) and the other of which has better asymptotic behaviour for regs with many
2158    /// `RangeFrag`s (in order to keep out of trouble on some pathological inputs).  These vectors
2159    /// are duplicate-free but the elements may be in an arbitrary order.
2160    pub(crate) rregs_with_many_frags: Vec<u32 /*RealReg index*/>,
2161    /// Same as above, for virtual registers.
2162    pub(crate) vregs_with_many_frags: Vec<u32 /*VirtualReg index*/>,
2163
2164    /// This indicates what the threshold is actually set to.  A frag will be in
2165    /// `{r,v}regs_with_many_frags` if it has `many_frags_thresh` or more RangeFrags.
2166    pub(crate) many_frags_thresh: usize,
2167}
2168
2169/// `MoveInfoElem` holds info about the two registers connected a move: the source and destination
2170/// of the move, the instruction performing the move, and the estimated execution frequency of the
2171/// containing block.
2172pub(crate) struct MoveInfoElem {
2173    pub(crate) dst: Reg,
2174    pub(crate) src: Reg,
2175    pub(crate) iix: InstIx,
2176    pub(crate) est_freq: u32,
2177}
2178
2179/// Vector of `MoveInfoElem`, presented in no particular order, but duplicate-free in that each
2180/// move instruction will be listed only once.
2181pub(crate) struct MoveInfo(Vec<MoveInfoElem>);
2182
2183impl MoveInfo {
2184    pub(crate) fn new(move_info: Vec<MoveInfoElem>) -> Self {
2185        Self(move_info)
2186    }
2187}
2188
2189impl Deref for MoveInfo {
2190    type Target = Vec<MoveInfoElem>;
2191    #[inline(always)]
2192    fn deref(&self) -> &Self::Target {
2193        &self.0
2194    }
2195}
2196
2197// Something that can be either a VirtualRangeIx or a RealRangeIx, whilst still being 32 bits
2198// (by stealing one bit from those spaces).  Note that the resulting thing no longer denotes a
2199// contiguous index space, and so it has a name that indicates it is an identifier rather than
2200// an index.
2201
2202#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)]
2203pub struct RangeId {
2204    // 1 X--(31)--X is a RealRangeIx with value X--(31)--X
2205    // 0 X--(31)--X is a VirtualRangeIx with value X--(31)--X
2206    bits: u32,
2207}
2208
2209impl RangeId {
2210    #[inline(always)]
2211    pub fn new_real(rlrix: RealRangeIx) -> Self {
2212        let n = rlrix.get();
2213        assert!(n <= 0x7FFF_FFFF);
2214        Self {
2215            bits: n | 0x8000_0000,
2216        }
2217    }
2218    #[inline(always)]
2219    pub fn new_virtual(vlrix: VirtualRangeIx) -> Self {
2220        let n = vlrix.get();
2221        assert!(n <= 0x7FFF_FFFF);
2222        Self { bits: n }
2223    }
2224    #[inline(always)]
2225    pub fn is_real(self) -> bool {
2226        self.bits & 0x8000_0000 != 0
2227    }
2228    #[allow(dead_code)]
2229    #[inline(always)]
2230    pub fn is_virtual(self) -> bool {
2231        self.bits & 0x8000_0000 == 0
2232    }
2233    #[inline(always)]
2234    pub fn to_real(self) -> RealRangeIx {
2235        assert!(self.bits & 0x8000_0000 != 0);
2236        RealRangeIx::new(self.bits & 0x7FFF_FFFF)
2237    }
2238    #[inline(always)]
2239    pub fn to_virtual(self) -> VirtualRangeIx {
2240        assert!(self.bits & 0x8000_0000 == 0);
2241        VirtualRangeIx::new(self.bits)
2242    }
2243    #[inline(always)]
2244    pub fn invalid_value() -> Self {
2245        // Real, and inplausibly huge
2246        Self { bits: 0xFFFF_FFFF }
2247    }
2248}
2249
2250impl fmt::Debug for RangeId {
2251    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
2252        if self.is_real() {
2253            self.to_real().fmt(fmt)
2254        } else {
2255            self.to_virtual().fmt(fmt)
2256        }
2257    }
2258}
2259
2260//=============================================================================
2261// Test cases
2262
2263// sewardj 2020Mar04: these are commented out for now, as they no longer
2264// compile.  They may be useful later though, once BT acquires an interval
2265// tree implementation for its CommitmentMap.
2266
2267/*
2268#[test]
2269fn test_sorted_frag_ranges() {
2270  // Create a RangeFrag and RangeFragIx from two InstPoints.
2271  fn gen_fix(
2272    fenv: &mut TypedIxVec<RangeFragIx, RangeFrag>, first: InstPoint,
2273    last: InstPoint,
2274  ) -> RangeFragIx {
2275    assert!(first <= last);
2276    let res = RangeFragIx::new(fenv.len() as u32);
2277    let frag = RangeFrag {
2278      bix: BlockIx::new(123),
2279      kind: RangeFragKind::Local,
2280      first,
2281      last,
2282      count: 0,
2283    };
2284    fenv.push(frag);
2285    res
2286  }
2287
2288  fn get_range_frag(
2289    fenv: &TypedIxVec<RangeFragIx, RangeFrag>, fix: RangeFragIx,
2290  ) -> &RangeFrag {
2291    &fenv[fix]
2292  }
2293
2294  // Structural equality, at least.  Not equality in the sense of
2295  // deferencing the contained RangeFragIxes.
2296  fn sorted_range_eq(
2297    fixs1: &SortedRangeFragIxs, fixs2: &SortedRangeFragIxs,
2298  ) -> bool {
2299    if fixs1.frag_ixs.len() != fixs2.frag_ixs.len() {
2300      return false;
2301    }
2302    for (mf1, mf2) in fixs1.frag_ixs.iter().zip(&fixs2.frag_ixs) {
2303      if mf1 != mf2 {
2304        return false;
2305      }
2306    }
2307    true
2308  }
2309
2310  let iix3 = InstIx::new(3);
2311  let iix4 = InstIx::new(4);
2312  let iix5 = InstIx::new(5);
2313  let iix6 = InstIx::new(6);
2314  let iix7 = InstIx::new(7);
2315  let iix10 = InstIx::new(10);
2316  let iix12 = InstIx::new(12);
2317
2318  let fp_3u = InstPoint::new_use(iix3);
2319  let fp_3d = InstPoint::new_def(iix3);
2320
2321  let fp_4u = InstPoint::new_use(iix4);
2322
2323  let fp_5u = InstPoint::new_use(iix5);
2324  let fp_5d = InstPoint::new_def(iix5);
2325
2326  let fp_6u = InstPoint::new_use(iix6);
2327  let fp_6d = InstPoint::new_def(iix6);
2328
2329  let fp_7u = InstPoint::new_use(iix7);
2330  let fp_7d = InstPoint::new_def(iix7);
2331
2332  let fp_10u = InstPoint::new_use(iix10);
2333  let fp_12u = InstPoint::new_use(iix12);
2334
2335  let mut fenv = TypedIxVec::<RangeFragIx, RangeFrag>::new();
2336
2337  let fix_3u = gen_fix(&mut fenv, fp_3u, fp_3u);
2338  let fix_3d = gen_fix(&mut fenv, fp_3d, fp_3d);
2339  let fix_4u = gen_fix(&mut fenv, fp_4u, fp_4u);
2340  let fix_3u_5u = gen_fix(&mut fenv, fp_3u, fp_5u);
2341  let fix_3d_5d = gen_fix(&mut fenv, fp_3d, fp_5d);
2342  let fix_3d_5u = gen_fix(&mut fenv, fp_3d, fp_5u);
2343  let fix_3u_5d = gen_fix(&mut fenv, fp_3u, fp_5d);
2344  let fix_6u_6d = gen_fix(&mut fenv, fp_6u, fp_6d);
2345  let fix_7u_7d = gen_fix(&mut fenv, fp_7u, fp_7d);
2346  let fix_10u = gen_fix(&mut fenv, fp_10u, fp_10u);
2347  let fix_12u = gen_fix(&mut fenv, fp_12u, fp_12u);
2348
2349  // Boundary checks for point ranges, 3u vs 3d
2350  assert!(
2351    cmp_range_frags(
2352      get_range_frag(&fenv, fix_3u),
2353      get_range_frag(&fenv, fix_3u)
2354    ) == Some(Ordering::Equal)
2355  );
2356  assert!(
2357    cmp_range_frags(
2358      get_range_frag(&fenv, fix_3u),
2359      get_range_frag(&fenv, fix_3d)
2360    ) == Some(Ordering::Less)
2361  );
2362  assert!(
2363    cmp_range_frags(
2364      get_range_frag(&fenv, fix_3d),
2365      get_range_frag(&fenv, fix_3u)
2366    ) == Some(Ordering::Greater)
2367  );
2368
2369  // Boundary checks for point ranges, 3d vs 4u
2370  assert!(
2371    cmp_range_frags(
2372      get_range_frag(&fenv, fix_3d),
2373      get_range_frag(&fenv, fix_3d)
2374    ) == Some(Ordering::Equal)
2375  );
2376  assert!(
2377    cmp_range_frags(
2378      get_range_frag(&fenv, fix_3d),
2379      get_range_frag(&fenv, fix_4u)
2380    ) == Some(Ordering::Less)
2381  );
2382  assert!(
2383    cmp_range_frags(
2384      get_range_frag(&fenv, fix_4u),
2385      get_range_frag(&fenv, fix_3d)
2386    ) == Some(Ordering::Greater)
2387  );
2388
2389  // Partially overlapping
2390  assert!(
2391    cmp_range_frags(
2392      get_range_frag(&fenv, fix_3d_5d),
2393      get_range_frag(&fenv, fix_3u_5u)
2394    ) == None
2395  );
2396  assert!(
2397    cmp_range_frags(
2398      get_range_frag(&fenv, fix_3u_5u),
2399      get_range_frag(&fenv, fix_3d_5d)
2400    ) == None
2401  );
2402
2403  // Completely overlapping: one contained within the other
2404  assert!(
2405    cmp_range_frags(
2406      get_range_frag(&fenv, fix_3d_5u),
2407      get_range_frag(&fenv, fix_3u_5d)
2408    ) == None
2409  );
2410  assert!(
2411    cmp_range_frags(
2412      get_range_frag(&fenv, fix_3u_5d),
2413      get_range_frag(&fenv, fix_3d_5u)
2414    ) == None
2415  );
2416
2417  // Create a SortedRangeFragIxs from a bunch of RangeFrag indices
2418  fn new_sorted_frag_ranges(
2419    fenv: &TypedIxVec<RangeFragIx, RangeFrag>, frags: &Vec<RangeFragIx>,
2420  ) -> SortedRangeFragIxs {
2421    SortedRangeFragIxs::new(&frags, fenv)
2422  }
2423
2424  // Construction tests
2425  // These fail due to overlap
2426  //let _ = new_sorted_frag_ranges(&fenv, &vec![fix_3u_3u, fix_3u_3u]);
2427  //let _ = new_sorted_frag_ranges(&fenv, &vec![fix_3u_5u, fix_3d_5d]);
2428
2429  // These fail due to not being in order
2430  //let _ = new_sorted_frag_ranges(&fenv, &vec![fix_4u_4u, fix_3u_3u]);
2431
2432  // Simple non-overlap tests for add()
2433
2434  let smf_empty = new_sorted_frag_ranges(&fenv, &vec![]);
2435  let smf_6_7_10 =
2436    new_sorted_frag_ranges(&fenv, &vec![fix_6u_6d, fix_7u_7d, fix_10u]);
2437  let smf_3_12 = new_sorted_frag_ranges(&fenv, &vec![fix_3u, fix_12u]);
2438  let smf_3_6_7_10_12 = new_sorted_frag_ranges(
2439    &fenv,
2440    &vec![fix_3u, fix_6u_6d, fix_7u_7d, fix_10u, fix_12u],
2441  );
2442  let mut tmp;
2443
2444  tmp = smf_empty.clone();
2445  tmp.add(&smf_empty, &fenv);
2446  assert!(sorted_range_eq(&tmp, &smf_empty));
2447
2448  tmp = smf_3_12.clone();
2449  tmp.add(&smf_empty, &fenv);
2450  assert!(sorted_range_eq(&tmp, &smf_3_12));
2451
2452  tmp = smf_empty.clone();
2453  tmp.add(&smf_3_12, &fenv);
2454  assert!(sorted_range_eq(&tmp, &smf_3_12));
2455
2456  tmp = smf_6_7_10.clone();
2457  tmp.add(&smf_3_12, &fenv);
2458  assert!(sorted_range_eq(&tmp, &smf_3_6_7_10_12));
2459
2460  tmp = smf_3_12.clone();
2461  tmp.add(&smf_6_7_10, &fenv);
2462  assert!(sorted_range_eq(&tmp, &smf_3_6_7_10_12));
2463
2464  // Tests for can_add()
2465  assert!(true == smf_empty.can_add(&smf_empty, &fenv));
2466  assert!(true == smf_empty.can_add(&smf_3_12, &fenv));
2467  assert!(true == smf_3_12.can_add(&smf_empty, &fenv));
2468  assert!(false == smf_3_12.can_add(&smf_3_12, &fenv));
2469
2470  assert!(true == smf_6_7_10.can_add(&smf_3_12, &fenv));
2471
2472  assert!(true == smf_3_12.can_add(&smf_6_7_10, &fenv));
2473
2474  // Tests for del()
2475  let smf_6_7 = new_sorted_frag_ranges(&fenv, &vec![fix_6u_6d, fix_7u_7d]);
2476  let smf_6_10 = new_sorted_frag_ranges(&fenv, &vec![fix_6u_6d, fix_10u]);
2477  let smf_7 = new_sorted_frag_ranges(&fenv, &vec![fix_7u_7d]);
2478  let smf_10 = new_sorted_frag_ranges(&fenv, &vec![fix_10u]);
2479
2480  tmp = smf_empty.clone();
2481  tmp.del(&smf_empty, &fenv);
2482  assert!(sorted_range_eq(&tmp, &smf_empty));
2483
2484  tmp = smf_3_12.clone();
2485  tmp.del(&smf_empty, &fenv);
2486  assert!(sorted_range_eq(&tmp, &smf_3_12));
2487
2488  tmp = smf_empty.clone();
2489  tmp.del(&smf_3_12, &fenv);
2490  assert!(sorted_range_eq(&tmp, &smf_empty));
2491
2492  tmp = smf_6_7_10.clone();
2493  tmp.del(&smf_3_12, &fenv);
2494  assert!(sorted_range_eq(&tmp, &smf_6_7_10));
2495
2496  tmp = smf_3_12.clone();
2497  tmp.del(&smf_6_7_10, &fenv);
2498  assert!(sorted_range_eq(&tmp, &smf_3_12));
2499
2500  tmp = smf_6_7_10.clone();
2501  tmp.del(&smf_6_7, &fenv);
2502  assert!(sorted_range_eq(&tmp, &smf_10));
2503
2504  tmp = smf_6_7_10.clone();
2505  tmp.del(&smf_10, &fenv);
2506  assert!(sorted_range_eq(&tmp, &smf_6_7));
2507
2508  tmp = smf_6_7_10.clone();
2509  tmp.del(&smf_7, &fenv);
2510  assert!(sorted_range_eq(&tmp, &smf_6_10));
2511
2512  // Tests for can_add_if_we_first_del()
2513  let smf_10_12 = new_sorted_frag_ranges(&fenv, &vec![fix_10u, fix_12u]);
2514
2515  assert!(
2516    true
2517      == smf_6_7_10
2518        .can_add_if_we_first_del(/*d=*/ &smf_10_12, /*a=*/ &smf_3_12, &fenv)
2519  );
2520
2521  assert!(
2522    false
2523      == smf_6_7_10
2524        .can_add_if_we_first_del(/*d=*/ &smf_10_12, /*a=*/ &smf_7, &fenv)
2525  );
2526}
2527*/
regalloc/data_structures.rs

regalloc/
data_structures.rs