Skip to main content

dag/
utils.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8use std::collections::HashMap;
9use std::collections::HashSet;
10use std::collections::VecDeque;
11use std::sync::Mutex;
12
13use futures::future::BoxFuture;
14use futures::TryStreamExt;
15
16use crate::errors::programming;
17use crate::Result;
18use crate::Set;
19use crate::Vertex;
20
21/// Pre-process a parent function that might have cycles.
22/// Return a new parent function that won't have cycles.
23///
24/// This function is not fast. Only use it on small graphs.
25pub fn break_parent_func_cycle<F>(parent_func: F) -> impl Fn(Vertex) -> Result<Vec<Vertex>>
26where
27    F: Fn(Vertex) -> Result<Vec<Vertex>>,
28{
29    #[derive(Default)]
30    struct State {
31        /// Previously calculated parents.
32        known: HashMap<Vertex, Vec<Vertex>>,
33    }
34    impl State {
35        fn is_ancestor(&self, ancestor: &Vertex, descentant: &Vertex) -> bool {
36            if ancestor == descentant {
37                return true;
38            }
39            let mut to_visit = vec![descentant];
40            let mut visited = HashSet::new();
41            while let Some(v) = to_visit.pop() {
42                if !visited.insert(v) {
43                    continue;
44                }
45                if let Some(parents) = self.known.get(v) {
46                    for p in parents {
47                        if p == ancestor {
48                            return true;
49                        }
50                        to_visit.push(p);
51                    }
52                }
53            }
54            false
55        }
56    }
57    let state: Mutex<State> = Default::default();
58
59    move |v: Vertex| -> Result<Vec<Vertex>> {
60        let mut state = state.lock().unwrap();
61        if let Some(parents) = state.known.get(&v) {
62            return Ok(parents.clone());
63        }
64        let parents = parent_func(v.clone())?;
65        let mut result = Vec::with_capacity(parents.len());
66        for p in parents {
67            if !state.is_ancestor(&v, &p) {
68                // Not a cycle.
69                result.push(p);
70            }
71        }
72        state.known.insert(v, result.clone());
73        Ok(result)
74    }
75}
76
77/// Given a `set` (sub-graph) and a filter function that selects "known"
78/// subset of its input, apply filter to `set`.
79///
80/// The filter funtion must have following properties:
81/// - filter(xs) + filter(ys) = filter(xs + ys)
82/// - If its input contains both X and Y and X is an ancestor of Y in the
83///   sub-graph, and its output contains Y, then its output must also
84///   contain Y's ancestor X.
85///   In other words, if vertex X is considered known, then ancestors
86///   of X are also known.
87///
88/// This function has a similar signature with `filter`, but it utilizes
89/// the above properties to test (much) less vertexes for a large input
90/// set.
91///
92/// The idea of the algorithm comes from Mercurial's `setdiscovery.py`,
93/// introduced by [1]. `setdiscovery.py` is used to figure out what
94/// commits are needed to be pushed or pulled.
95///
96/// [1]: https://www.mercurial-scm.org/repo/hg/rev/cb98fed52495
97pub async fn filter_known<'a>(
98    set: Set,
99    filter_known_func: &(
100         dyn (Fn(&[Vertex]) -> BoxFuture<'a, Result<Vec<Vertex>>>) + Send + Sync + 'a
101     ),
102) -> Result<Set> {
103    // Figure out unassigned (missing) vertexes that do need to be inserted.
104    //
105    // remaining:  subset not categorized.
106    // known:      subset categorized as "known"
107    // unknown:    subset categorized as "unknown"
108    //
109    // See [1] for the algorithm, basically:
110    // - Take a subset (sample) of "remaining".
111    // - Check the subset (sample). Divide it into (new_known, new_unknown).
112    // - known   |= ancestors(new_known)
113    // - unknown |= descendants(new_unknown)
114    // - remaining -= known | unknown
115    // - Repeat until "remaining" becomes empty.
116    let mut remaining = set;
117    let subdag = match remaining.dag() {
118        Some(dag) => dag,
119        None => return programming("filter_known requires set to associate to a Dag"),
120    };
121    let mut known = Set::empty();
122
123    for i in 1usize.. {
124        let remaining_old_len = remaining.count_slow().await?;
125        if remaining_old_len == 0 {
126            break;
127        }
128
129        // Sample: heads, roots, and the "middle point" from "remaining".
130        let sample = if i <= 2 {
131            // But for the first few queries, let's just check the roots.
132            // This could reduce remote lookups, when we only need to
133            // query the roots to rule out all `remaining` vertexes.
134            subdag.roots(remaining.clone()).await?
135        } else {
136            subdag
137                .roots(remaining.clone())
138                .await?
139                .union(&subdag.heads(remaining.clone()).await?)
140                .union(&remaining.skip((remaining_old_len as u64) / 2).take(1))
141        };
142        let sample: Vec<Vertex> = sample.iter().await?.try_collect().await?;
143        let new_known = filter_known_func(&sample).await?;
144        let new_unknown: Vec<Vertex> = {
145            let filtered_set: HashSet<Vertex> = new_known.iter().cloned().collect();
146            sample
147                .iter()
148                .filter(|v| !filtered_set.contains(v))
149                .cloned()
150                .collect()
151        };
152
153        let new_known = Set::from_static_names(new_known);
154        let new_unknown = Set::from_static_names(new_unknown);
155
156        let new_known = subdag.ancestors(new_known).await?;
157        let new_unknown = subdag.descendants(new_unknown).await?;
158
159        remaining = remaining.difference(&new_known.union(&new_unknown));
160        let remaining_new_len = remaining.count_slow().await?;
161
162        let known_old_len = known.count_slow().await?;
163        known = known.union(&new_known);
164        let known_new_len = known.count_slow().await?;
165
166        tracing::trace!(
167            target: "dag::utils::filter_known",
168            "#{} remaining {} => {}, known: {} => {}",
169            i,
170            remaining_old_len,
171            remaining_new_len,
172            known_old_len,
173            known_new_len
174        );
175    }
176
177    Ok(known)
178}
179
180/// Produce an order of "nodes" in a graph for cleaner graph output.
181/// For example, turn:
182///
183/// ```plain,ignore
184/// 0-1---3---5---7---9---11--12
185///             / \
186///    2---4---6   8---10
187/// # [12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
188/// ```
189///
190/// into:
191///
192/// ```plain,ignore
193/// 0-1-3-5-------7------9-11-12
194///              / \
195///         2-4-6   8-10
196/// # [12, 11, 9, 10, 8, 7, 6, 4, 2, 5, 3, 1, 0]
197/// ```
198///
199/// This function takes a slice of integers as graph "nodes". The callsite
200/// should maintain mapping between the integers and real graph "nodes"
201/// (vertexes, ids, etc.).
202///
203/// The algorithm works by DFSing from roots to heads following the
204/// parent->child edges. It was originally implemented in `smartlog.py`.
205///
206/// By default, longer branches get output first (for a fork), or last (for a
207/// merge). This makes a typical vertical graph rendering algorithm more
208/// likely put longer branches in the first column, result in less indentation
209/// overall. This can be overridden by `priorities`. `priorities` and their
210/// ancestors are more likely outputted in the first column. For a typical
211/// vertical graph rendering algorithm, `priorities` is usually set to the
212/// head of the main branch.
213pub fn beautify_graph(parents: &[Vec<usize>], priorities: &[usize]) -> Vec<usize> {
214    let n = parents.len();
215    let mut children_list: Vec<Vec<usize>> = (0..n).map(|_| Vec::new()).collect();
216    let mut weight: Vec<isize> = vec![1; n];
217    let mut roots: Vec<usize> = Vec::new();
218
219    // Consider user-provided additional_weight.
220    for &i in priorities {
221        if i < n {
222            weight[i] = n as isize;
223        }
224    }
225
226    // Populate children_list.
227    for (i, ps) in parents.iter().enumerate().rev() {
228        for &p in ps {
229            // i has parent p
230            if p < n {
231                children_list[p].push(i);
232                weight[p] = weight[p].saturating_add(weight[i]);
233            }
234        }
235        if ps.is_empty() {
236            roots.push(i);
237        }
238    }
239
240    // Sort children by weight.
241    for children in children_list.iter_mut() {
242        children.sort_unstable_by_key(|&c| (weight[c], c));
243    }
244    roots.sort_unstable_by_key(|&c| (-weight[c], c));
245    drop(weight);
246
247    // DFS from roots to heads.
248    let mut remaining: Vec<usize> = parents.iter().map(|ps| ps.len()).collect();
249    let mut output: Vec<usize> = Vec::with_capacity(n);
250    let mut outputted: Vec<bool> = vec![false; n];
251    let mut to_visit: VecDeque<usize> = roots.into();
252    while let Some(id) = to_visit.pop_front() {
253        // Already outputted?
254        if outputted[id] {
255            continue;
256        }
257
258        // Need to wait for visiting other parents first?
259        if remaining[id] > 0 {
260            // Visit it later.
261            to_visit.push_back(id);
262            continue;
263        }
264
265        // Output this id.
266        output.push(id);
267        outputted[id] = true;
268
269        // Visit children next.
270        for &c in children_list[id].iter().rev() {
271            remaining[c] -= 1;
272            to_visit.push_front(c);
273        }
274    }
275
276    output.reverse();
277    output
278}
279
280#[cfg(test)]
281mod tests {
282    use super::*;
283
284    #[test]
285    fn test_break_parent_func_cycle() -> Result<()> {
286        let parent_func = |n: Vertex| -> Result<Vec<Vertex>> { Ok(vec![n, v("1"), v("2")]) };
287        let parent_func_no_cycle = break_parent_func_cycle(parent_func);
288        assert_eq!(parent_func_no_cycle(v("A"))?, vec![v("1"), v("2")]);
289        assert_eq!(parent_func_no_cycle(v("1"))?, vec![v("2")]);
290        assert_eq!(parent_func_no_cycle(v("2"))?, vec![]);
291        Ok(())
292    }
293
294    #[test]
295    fn test_break_parent_func_cycle_linear() -> Result<()> {
296        let parent_func = |n: Vertex| -> Result<Vec<Vertex>> {
297            let list = "0123456789".chars().map(v).collect::<Vec<_>>();
298            let parents = match list.iter().position(|x| x == &n) {
299                Some(i) if i > 0 => vec![list[i - 1].clone()],
300                _ => vec![],
301            };
302            Ok(parents)
303        };
304        let parent_func_no_cycle = break_parent_func_cycle(parent_func);
305        assert_eq!(parent_func_no_cycle(v("2"))?, vec![v("1")]);
306        assert_eq!(parent_func_no_cycle(v("9"))?, vec![v("8")]);
307        assert_eq!(parent_func_no_cycle(v("8"))?, vec![v("7")]);
308        assert_eq!(parent_func_no_cycle(v("1"))?, vec![v("0")]);
309        assert_eq!(parent_func_no_cycle(v("5"))?, vec![v("4")]);
310        assert_eq!(parent_func_no_cycle(v("6"))?, vec![v("5")]);
311        assert_eq!(parent_func_no_cycle(v("4"))?, vec![v("3")]);
312        assert_eq!(parent_func_no_cycle(v("0"))?, vec![]);
313        assert_eq!(parent_func_no_cycle(v("3"))?, vec![v("2")]);
314        assert_eq!(parent_func_no_cycle(v("7"))?, vec![v("6")]);
315        Ok(())
316    }
317
318    /// Quickly create a Vertex.
319    fn v(name: impl ToString) -> Vertex {
320        Vertex::copy_from(name.to_string().as_bytes())
321    }
322
323    #[test]
324    fn test_beautify_graph() {
325        let t = |text: &str| -> Vec<usize> {
326            let split: Vec<&str> = text.split('#').chain(std::iter::once("")).take(2).collect();
327            let parents = parents_from_drawdag(split[0]);
328            let priorities: Vec<usize> = split[1]
329                .split_whitespace()
330                .map(|s| s.parse::<usize>().unwrap())
331                .collect();
332            beautify_graph(&parents, &priorities)
333        };
334        assert_eq!(
335            t(r#"
336              0-2-4-5
337                   /
338                1-3"#),
339            [5, 3, 1, 4, 2, 0]
340        );
341        assert_eq!(
342            t(r#"
343              0-2-4-5
344               \   /
345                1-3"#),
346            [5, 4, 2, 3, 1, 0]
347        );
348        assert_eq!(
349            t(r#"
350              0-2-4-5
351               \
352                1-3"#),
353            [5, 4, 2, 3, 1, 0]
354        );
355        assert_eq!(
356            t(r#"
357                 0-1-3-5-7-9-11-12
358                        / \
359                   2-4-6   8-10"#),
360            [12, 11, 9, 10, 8, 7, 6, 4, 2, 5, 3, 1, 0]
361        );
362        assert_eq!(
363            t(r#"
364                   0-3-5-7-9-12
365                        / \
366                 1-2-4-6   8-10-11"#),
367            [11, 10, 8, 12, 9, 7, 5, 3, 0, 6, 4, 2, 1]
368        );
369
370        // Preserve (reversed) order for same-length branches.
371        // [0, 1, 2], 2 is before 1.
372        assert_eq!(
373            t(r#"
374                  0-1
375                   \
376                    2"#),
377            [2, 1, 0]
378        );
379        // [0, 1, 2], 1 is before 0.
380        assert_eq!(
381            t(r#"
382                  0-2
383                   /
384                  1"#),
385            [2, 1, 0]
386        );
387
388        // With manual 'priorities'
389        assert_eq!(
390            t(r#"
391              0-2-4-5
392                   /
393                1-3   # 4"#),
394            [5, 3, 1, 4, 2, 0]
395        );
396        assert_eq!(
397            t(r#"
398              0-2-4-5
399                   /
400                1-3   # 3"#),
401            [5, 4, 2, 0, 3, 1],
402        );
403
404        assert_eq!(
405            t(r#"
406              0-2-4-5
407               \
408                1-3   # 3"#),
409            [3, 1, 5, 4, 2, 0]
410        );
411        assert_eq!(
412            t(r#"
413              0-2-4-5
414               \
415                1-3   # 5"#),
416            [5, 4, 2, 3, 1, 0]
417        );
418    }
419
420    /// Convert drawdag ASCII to an integer parents array.
421    fn parents_from_drawdag(ascii: &str) -> Vec<Vec<usize>> {
422        let parents_map = drawdag::parse(ascii);
423        let mut parents_vec = Vec::new();
424        for i in 0usize.. {
425            let s = i.to_string();
426            let parents = match parents_map.get(&s) {
427                Some(parents) => parents,
428                None => break,
429            };
430            let parents: Vec<usize> = parents
431                .iter()
432                .map(|p| p.parse::<usize>().unwrap())
433                .collect();
434            parents_vec.push(parents);
435        }
436        parents_vec
437    }
438}