1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
//! This module contains stuff related to choosing better loops in Relooper. The problem is simple:
//! there is a point in the relooper algorithm where we commit to making a `Loop` structure. At that
//! point, we partition blocks into those that go into the loop and those that go after the loop.
//!
//! Relooper gives us a handful of blocks which _must_ go in the loop, but we have some freedom to
//! choose which other blocks we want to also push into the loop.
//!
//! Our choice can be informed by one of two strategies:
//!
//!   * _Do what C does_. We can try to match what C does by keeping track of which blocks in CFG
//!     formed loops in the initial C source. This information needs to be built up when initially
//!     building the CFG .
//!
//!   * _Try to avoid jump-table's_. The more entries we push into the loop, the more we risk
//!     creating a massive loop that starts with a jump table. OTOH, the more entries we put after
//!     the loop, the more likely we are to have a jump table right after the loop.
//!

#![deny(missing_docs)]

use super::*;
use indexmap::{IndexMap, IndexSet};

/// Modifies the `body_blocks`, `follow_blocks`, and `follow_entries` to try to get all of the
/// `desired_body` labels into the body. If it is not possible to do this, returns `false` (and the
/// mutable references passed in cannot be used).
///
/// Also return `false` if the loop body ends up having follow blocks pointing into it.
pub fn match_loop_body(
    mut desired_body: IndexSet<Label>,
    strict_reachable_from: &IndexMap<Label, IndexSet<Label>>,
    body_blocks: &mut IndexMap<Label, BasicBlock<StructureLabel<StmtOrDecl>, StmtOrDecl>>,
    follow_blocks: &mut IndexMap<Label, BasicBlock<StructureLabel<StmtOrDecl>, StmtOrDecl>>,
    follow_entries: &mut IndexSet<Label>,
) -> bool {
    // Keep moving `follow_entries` that are also in `desired_body` into the loop's `body_blocks`
    let mut something_happened = true;
    while something_happened {
        something_happened = false;

        let to_move: Vec<Label> = follow_entries
            .intersection(&desired_body)
            .cloned()
            .collect();

        for following in to_move {
            let bb = if let Some(bb) = follow_blocks.remove(&following) {
                bb
            } else {
                continue;
            };
            something_happened = true;

            desired_body.remove(&following);

            follow_entries.remove(&following);
            follow_entries.extend(&bb.successors());
            follow_entries.retain(|e| !body_blocks.contains_key(e));
            body_blocks.insert(following, bb);
        }
    }

    desired_body.is_empty()
        && body_blocks.keys().all(|body_lbl| {
            // check that no body block can be reached from a block _not_ in the loop
            match strict_reachable_from.get(body_lbl) {
                None => true,
                Some(reachable_froms) => reachable_froms
                    .iter()
                    .all(|lbl| body_blocks.contains_key(lbl)),
            }
        })
}

/// Use heuristics to decide which blocks to move into the loop body.
///
///  1. Don't do anything if `follow_entries` is zero or one (since that means whatever
///     follows the loop will be nice looking).
///  2. Otherwise, recursively push into the loop `follow_entries` as long as they have no
///     more than 1 successor (the hope is that some of the chains will join).
///
/// This always succeeds.
pub fn heuristic_loop_body(
    predecessor_map: &IndexMap<Label, IndexSet<Label>>,
    body_blocks: &mut IndexMap<Label, BasicBlock<StructureLabel<StmtOrDecl>, StmtOrDecl>>,
    follow_blocks: &mut IndexMap<Label, BasicBlock<StructureLabel<StmtOrDecl>, StmtOrDecl>>,
    follow_entries: &mut IndexSet<Label>,
) -> () {
    if follow_entries.len() > 1 {
        for follow_entry in follow_entries.clone().iter() {
            let mut following: Label = *follow_entry;

            loop {
                // If this block might have come from 2 places, give up
                if predecessor_map[&following].len() != 1 {
                    break;
                }

                // Otherwise, move it into the loop
                let bb = if let Some(bb) = follow_blocks.remove(&following) {
                    bb
                } else {
                    break;
                };
                let succs = bb.successors();

                body_blocks.insert(following, bb);
                follow_entries.remove(&following);
                follow_entries.extend(&succs);

                // If it has more than one successor, don't try following the successor
                if succs.len() != 1 {
                    break;
                }

                following = *succs.iter().next().unwrap();
            }
        }
    }
}

/// These IDs identify groups of basic blocks corresponding to loops in a CFG.
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
pub struct LoopId(u64);

impl LoopId {
    /// Create a new loop id from (presumably) fresh number.
    pub fn new(id: u64) -> LoopId {
        LoopId(id)
    }

    /// Turn the loop id into an identifier. Note that there one needs to add on a tick mark for
    /// this to be usable as a loop label.
    pub fn pretty_print(&self) -> String {
        let &LoopId(loop_id) = self;
        format!("l_{}", loop_id)
    }
}

/// Stores information about loops in a CFG.
#[derive(Clone, Debug)]
pub struct LoopInfo<Lbl: Hash + Eq> {
    /// Given a node, find the tightest enclosing loop
    node_loops: IndexMap<Lbl, LoopId>,

    /// Given a loop, find all the nodes in it, along with the next tighest loop around it.
    loops: IndexMap<LoopId, (IndexSet<Lbl>, Option<LoopId>)>,
}

impl<Lbl: Hash + Eq + Clone> LoopInfo<Lbl> {
    #[allow(missing_docs)]
    pub fn new() -> Self {
        LoopInfo {
            node_loops: IndexMap::new(),
            loops: IndexMap::new(),
        }
    }

    /// Merge the information from another `LoopInfo` into this `LoopInfo`
    pub fn absorb(&mut self, other: LoopInfo<Lbl>) {
        self.node_loops.extend(other.node_loops);
        self.loops.extend(other.loops);
    }

    /// Find the smallest possible loop that contains all of the items
    pub fn tightest_common_loop<E: Iterator<Item = Lbl>>(&self, mut entries: E) -> Option<LoopId> {
        let first = if let Some(f) = entries.next() {
            f
        } else {
            return None;
        };

        let mut loop_id = if let Some(i) = self.node_loops.get(&first) {
            *i
        } else {
            return None;
        };

        for entry in entries {
            // Widen the loop until it contains the `entry`, or it can no longer be widened.
            loop {
                let (ref in_loop, mut parent_id) = self.loops[&loop_id];
                if in_loop.contains(&entry) {
                    break;
                }
                loop_id = if let Some(i) = parent_id {
                    i
                } else {
                    return None;
                };
            }
        }

        return Some(loop_id);
    }

    /// Filter out any nodes which need to be pruned from the entire CFG due to being unreachable.
    pub fn filter_unreachable(&mut self, reachable: &IndexSet<Lbl>) -> () {
        self.node_loops.retain(|lbl, _| reachable.contains(lbl));
        for (_, &mut (ref mut set, _)) in self.loops.iter_mut() {
            set.retain(|lbl| reachable.contains(lbl));
        }
    }

    /// Rewrite nodes to take into account a node remapping. Note that the remapping is usually
    /// going to be very much _not_ injective - the whole point of remapping is to merge some nodes.
    pub fn rewrite_blocks(&mut self, rewrites: &IndexMap<Lbl, Lbl>) -> () {
        self.node_loops.retain(|lbl, _| rewrites.get(lbl).is_none());
        for (_, &mut (ref mut set, _)) in self.loops.iter_mut() {
            set.retain(|lbl| rewrites.get(lbl).is_none());
        }
    }

    /// Add in information about a new loop
    pub fn add_loop(
        &mut self,
        id: LoopId,
        contents: IndexSet<Lbl>,
        outer_id: Option<LoopId>,
    ) -> () {
        for elem in &contents {
            if !self.node_loops.contains_key(elem) {
                self.node_loops.insert(elem.clone(), id);
            }
        }

        self.loops.insert(id, (contents, outer_id));
    }

    /// Get all of the `LoopId`'s corresponding to loops of increasing size around the given label
    pub fn enclosing_loops(&self, lbl: &Lbl) -> Vec<LoopId> {
        let mut loop_id_opt: Option<LoopId> = self.node_loops.get(lbl).cloned();
        let mut loop_ids = vec![];
        while let Some(loop_id) = loop_id_opt {
            loop_ids.push(loop_id);
            loop_id_opt = self.loops[&loop_id].1;
        }
        loop_ids
    }

    /// Get all of the nodes contained in a given loop
    pub fn get_loop_contents<'a>(&'a self, id: LoopId) -> &'a IndexSet<Lbl> {
        &self
            .loops
            .get(&id)
            .expect(&format!("There is no loop with id {:?}", id))
            .0
    }
}