stack_graphs/
cycles.rs

1// -*- coding: utf-8 -*-
2// ------------------------------------------------------------------------------------------------
3// Copyright © 2021, stack-graphs authors.
4// Licensed under either of Apache License, Version 2.0, or MIT license, at your option.
5// Please see the LICENSE-APACHE or LICENSE-MIT files in this distribution for license details.
6// ------------------------------------------------------------------------------------------------
7
8//! Detect and avoid cycles in our path-finding algorithm.
9//!
10//! Cycles in a stack graph can indicate many things.  Your language might allow mutually recursive
11//! imports.  If you are modeling dataflow through function calls, then any recursion in your
12//! function calls will lead to cycles in your stack graph.  And if you have any control-flow paths
13//! that lead to infinite loops at runtime, we'll probably discover those as stack graph paths
14//! during the path-finding algorithm.
15//!
16//! (Note that we're only considering cycles in well-formed paths.  For instance, _pop symbol_
17//! nodes are "guards" that don't allow you to progress into a node if the top of the symbol stack
18//! doesn't match.  We don't consider that a valid path, and so we don't have to worry about
19//! whether it contains any cycles.)
20//!
21//! This module implements a cycle detector that lets us detect these situations and "cut off"
22//! these paths, not trying to extend them any further.  Note that any cycle detection logic we
23//! implement will be a heuristic.  In particular, since our path-finding algorithm will mimic any
24//! runtime recursion, a "complete" cycle detection logic would be equivalent to the Halting
25//! Problem.
26//!
27//! Right now, we implement a simple heuristic where we limit the number of distinct paths that we
28//! process that have the same start and end nodes.  We do not make any guarantees that we will
29//! always use this particular heuristic, however!  We reserve the right to change the heuristic at
30//! any time.
31
32use enumset::EnumSet;
33use smallvec::SmallVec;
34use std::cmp::Ordering;
35use std::collections::HashMap;
36
37use crate::arena::Arena;
38use crate::arena::Handle;
39use crate::arena::List;
40use crate::arena::ListArena;
41use crate::graph::Node;
42use crate::graph::StackGraph;
43use crate::partial::Cyclicity;
44use crate::partial::PartialPath;
45use crate::partial::PartialPaths;
46use crate::paths::PathResolutionError;
47use crate::stats::FrequencyDistribution;
48use crate::stitching::Appendable;
49use crate::stitching::ToAppendable;
50
51/// Helps detect similar paths in the path-finding algorithm.
52pub struct SimilarPathDetector<P> {
53    paths: HashMap<PathKey, SmallVec<[P; 4]>>,
54    counts: Option<HashMap<PathKey, SmallVec<[usize; 4]>>>,
55}
56
57#[doc(hidden)]
58#[derive(Clone, Eq, Hash, PartialEq)]
59pub struct PathKey {
60    start_node: Handle<Node>,
61    end_node: Handle<Node>,
62    symbol_stack_precondition_len: usize,
63    scope_stack_precondition_len: usize,
64    symbol_stack_postcondition_len: usize,
65    scope_stack_postcondition_len: usize,
66}
67
68#[doc(hidden)]
69pub trait HasPathKey: Clone {
70    type Arena;
71    fn key(&self) -> PathKey;
72}
73
74impl HasPathKey for PartialPath {
75    type Arena = PartialPaths;
76
77    fn key(&self) -> PathKey {
78        PathKey {
79            start_node: self.start_node,
80            end_node: self.end_node,
81            symbol_stack_precondition_len: self.symbol_stack_precondition.len(),
82            scope_stack_precondition_len: self.scope_stack_precondition.len(),
83            symbol_stack_postcondition_len: self.symbol_stack_postcondition.len(),
84            scope_stack_postcondition_len: self.scope_stack_postcondition.len(),
85        }
86    }
87}
88
89impl<P> SimilarPathDetector<P>
90where
91    P: HasPathKey,
92{
93    /// Creates a new, empty cycle detector.
94    pub fn new() -> SimilarPathDetector<P> {
95        SimilarPathDetector {
96            paths: HashMap::new(),
97            counts: None,
98        }
99    }
100
101    /// Set whether to collect statistics for this similar path detector.
102    pub fn set_collect_stats(&mut self, collect_stats: bool) {
103        if !collect_stats {
104            self.counts = None;
105        } else if self.counts.is_none() {
106            self.counts = Some(HashMap::new());
107        }
108    }
109
110    /// Add a path, and determine whether we should process this path during the path-finding algorithm.
111    /// If we have seen a path with the same start and end node, and the same pre- and postcondition, then
112    /// we return false. Otherwise, we return true.
113    pub fn add_path<Cmp>(
114        &mut self,
115        _graph: &StackGraph,
116        arena: &mut P::Arena,
117        path: &P,
118        cmp: Cmp,
119    ) -> bool
120    where
121        Cmp: Fn(&mut P::Arena, &P, &P) -> Option<Ordering>,
122    {
123        let key = path.key();
124
125        // Iterate through the bucket to determine if this paths is better than any already known
126        // path. Note that the bucket might be modified during the loop if a path is removed which
127        // is shadowed by the new path!
128        let possibly_similar_paths = self.paths.entry(key.clone()).or_default();
129        let mut possible_similar_counts = self
130            .counts
131            .as_mut()
132            .map(move |cs| cs.entry(key).or_default());
133        let mut idx = 0;
134        let mut count = 0;
135        while idx < possibly_similar_paths.len() {
136            let other_path = &mut possibly_similar_paths[idx];
137            match cmp(arena, path, other_path) {
138                Some(Ordering::Less) => {
139                    // the new path is better, remove the old one
140                    possibly_similar_paths.remove(idx);
141                    if let Some(possible_similar_counts) = possible_similar_counts.as_mut() {
142                        count += possible_similar_counts[idx];
143                        possible_similar_counts.remove(idx);
144                    }
145                    // keep `idx` which now points to the next element
146                    continue;
147                }
148                Some(_) => {
149                    // the new path is equal or worse, and ignored
150                    if let Some(possible_similar_counts) = possible_similar_counts {
151                        possible_similar_counts[idx] += 1;
152                    }
153                    return true;
154                }
155                None => {
156                    idx += 1;
157                }
158            }
159        }
160
161        // this path is either new or better, keep it
162        possibly_similar_paths.push(path.clone());
163        if let Some(possible_similar_counts) = possible_similar_counts {
164            possible_similar_counts.push(count);
165        }
166        false
167    }
168
169    #[cfg(feature = "copious-debugging")]
170    pub fn max_bucket_size(&self) -> usize {
171        self.paths.iter().map(|b| b.1.len()).max().unwrap_or(0)
172    }
173
174    // Returns the distribution of similar path counts.
175    pub fn stats(&self) -> SimilarPathStats {
176        let mut stats = SimilarPathStats::default();
177        if let Some(counts) = &self.counts {
178            for bucket in counts.values() {
179                stats.similar_path_bucket_size.record(bucket.len());
180                for count in bucket.iter() {
181                    stats.similar_path_count.record(*count);
182                }
183            }
184        }
185        stats
186    }
187}
188
189#[derive(Clone, Debug, Default)]
190pub struct SimilarPathStats {
191    // The distribution of the number of similar paths detected
192    pub similar_path_count: FrequencyDistribution<usize>,
193    // The distribution of the internal bucket sizes in the similar path detector
194    pub similar_path_bucket_size: FrequencyDistribution<usize>,
195}
196
197impl std::ops::AddAssign<Self> for SimilarPathStats {
198    fn add_assign(&mut self, rhs: Self) {
199        self.similar_path_bucket_size += rhs.similar_path_bucket_size;
200        self.similar_path_count += rhs.similar_path_count;
201    }
202}
203
204impl std::ops::AddAssign<&Self> for SimilarPathStats {
205    fn add_assign(&mut self, rhs: &Self) {
206        self.similar_path_bucket_size += &rhs.similar_path_bucket_size;
207        self.similar_path_count += &rhs.similar_path_count;
208    }
209}
210
211// ----------------------------------------------------------------------------
212// Cycle detector
213
214/// An arena used by [`AppendingCycleDetector`][] to store the path component lists.
215/// The arena is shared between all cycle detectors in a path stitching run, so that
216/// the cycle detectors themselves can be small and cheaply cloned.
217pub struct Appendables<H> {
218    /// List arena for appendable lists
219    elements: ListArena<InternedOrHandle<H>>,
220    /// Arena for interned partial paths
221    interned: Arena<PartialPath>,
222}
223
224impl<H> Appendables<H> {
225    pub fn new() -> Self {
226        Self {
227            elements: ListArena::new(),
228            interned: Arena::new(),
229        }
230    }
231}
232
233/// Enum that unifies handles to initial paths interned in the cycle detector, and appended
234/// handles to appendables in the external database.
235#[derive(Clone)]
236enum InternedOrHandle<H> {
237    Interned(Handle<PartialPath>),
238    Database(H),
239}
240
241impl<H> InternedOrHandle<H>
242where
243    H: Clone,
244{
245    fn append_to<'a, A, Db>(
246        &self,
247        graph: &StackGraph,
248        partials: &mut PartialPaths,
249        db: &'a Db,
250        interned: &Arena<PartialPath>,
251        path: &mut PartialPath,
252    ) -> Result<(), PathResolutionError>
253    where
254        A: Appendable + 'a,
255        Db: ToAppendable<H, A>,
256    {
257        match self {
258            Self::Interned(h) => interned.get(*h).append_to(graph, partials, path),
259            Self::Database(h) => db.get_appendable(h).append_to(graph, partials, path),
260        }
261    }
262
263    fn start_node<'a, A, Db>(&self, db: &'a Db, interned: &Arena<PartialPath>) -> Handle<Node>
264    where
265        A: Appendable + 'a,
266        Db: ToAppendable<H, A>,
267    {
268        match self {
269            Self::Interned(h) => interned.get(*h).start_node,
270            Self::Database(h) => db.get_appendable(h).start_node(),
271        }
272    }
273
274    fn end_node<'a, A, Db>(&self, db: &'a Db, interned: &Arena<PartialPath>) -> Handle<Node>
275    where
276        A: Appendable + 'a,
277        Db: ToAppendable<H, A>,
278    {
279        match self {
280            Self::Interned(h) => interned.get(*h).end_node,
281            Self::Database(h) => db.get_appendable(h).end_node(),
282        }
283    }
284}
285
286/// A cycle detector that builds up paths by appending elements to it.
287/// Path elements are stored in a shared arena that must be provided
288/// when calling methods, so that cloning the cycle detector itself is
289/// cheap.
290#[derive(Clone)]
291pub struct AppendingCycleDetector<H> {
292    appendages: List<InternedOrHandle<H>>,
293}
294
295impl<H> AppendingCycleDetector<H> {
296    pub fn new() -> Self {
297        Self {
298            appendages: List::empty(),
299        }
300    }
301
302    pub fn from(appendables: &mut Appendables<H>, path: PartialPath) -> Self {
303        let h = appendables.interned.add(path);
304        let mut result = Self::new();
305        result
306            .appendages
307            .push_front(&mut appendables.elements, InternedOrHandle::Interned(h));
308        result
309    }
310
311    pub fn append(&mut self, appendables: &mut Appendables<H>, appendage: H) {
312        self.appendages.push_front(
313            &mut appendables.elements,
314            InternedOrHandle::Database(appendage),
315        );
316    }
317}
318
319impl<H> AppendingCycleDetector<H>
320where
321    H: Clone,
322{
323    /// Tests if the path is cyclic. Returns a vector indicating the kind of cycles that were found.
324    /// If appending or concatenating all fragments succeeds, this function will never raise and error.
325    pub fn is_cyclic<'a, A, Db>(
326        &self,
327        graph: &StackGraph,
328        partials: &mut PartialPaths,
329        db: &'a Db,
330        appendables: &mut Appendables<H>,
331    ) -> Result<EnumSet<Cyclicity>, PathResolutionError>
332    where
333        A: Appendable + 'a,
334        Db: ToAppendable<H, A>,
335    {
336        let mut cycles = EnumSet::new();
337
338        let end_node = match self.appendages.clone().pop_front(&mut appendables.elements) {
339            Some(appendage) => appendage.end_node(db, &appendables.interned),
340            None => return Ok(cycles),
341        };
342
343        let mut maybe_cyclic_path = None;
344        let mut remaining_appendages = self.appendages;
345        // Unlike the stored appendages, which are stored in a shared arena, we use a _local_
346        // buffer to collect the prefix appendages that we collect for possible cycles. This is
347        // to prevent adding elements to the shared arena for every invocation of this method,
348        // because they would remain in the arena after the method returns. We take care to
349        // minimize (re)allocations by (a) only allocating when a possible cycle is detected,
350        // (b) reserving all necessary space before adding elements, and (c) reusing the buffer
351        // between loop iterations.
352        let mut prefix_appendages = Vec::new();
353        loop {
354            // find cycle length
355            let mut counting_appendages = remaining_appendages;
356            let mut cycle_length = 0usize;
357            loop {
358                let appendable = counting_appendages.pop_front(&mut appendables.elements);
359                match appendable {
360                    Some(appendage) => {
361                        cycle_length += 1;
362                        let is_cycle = appendage.start_node(db, &appendables.interned) == end_node;
363                        if is_cycle {
364                            break;
365                        }
366                    }
367                    None => return Ok(cycles),
368                }
369            }
370
371            // collect prefix elements (reversing their order)
372            prefix_appendages.clear();
373            prefix_appendages.reserve(cycle_length);
374            for _ in 0..cycle_length {
375                let appendable = remaining_appendages
376                    .pop_front(&mut appendables.elements)
377                    .expect("")
378                    .clone();
379                prefix_appendages.push(appendable);
380            }
381
382            // build prefix path -- prefix starts at end_node, because this is a cycle
383            let mut prefix_path = PartialPath::from_node(graph, partials, end_node);
384            while let Some(appendage) = prefix_appendages.pop() {
385                appendage.append_to(
386                    graph,
387                    partials,
388                    db,
389                    &appendables.interned,
390                    &mut prefix_path,
391                )?;
392            }
393
394            // build cyclic path
395            let cyclic_path = maybe_cyclic_path
396                .unwrap_or_else(|| PartialPath::from_node(graph, partials, end_node));
397            cyclic_path.append_to(graph, partials, &mut prefix_path)?;
398            if prefix_path.edges.len() > 0 {
399                if let Some(cyclicity) = prefix_path.is_cyclic(graph, partials) {
400                    cycles |= cyclicity;
401                }
402            }
403            maybe_cyclic_path = Some(prefix_path);
404        }
405    }
406}