scm_bisect/
basic.rs

1//! Example implementations of basic search strategies as defined in [`search`].
2//! See [`BasicStrategyKind`] for the list.
3
4use std::collections::{HashMap, HashSet};
5use std::fmt::Debug;
6use std::hash::Hash;
7
8use indexmap::IndexMap;
9use tracing::instrument;
10
11use crate::search;
12
13/// Directed acyclic graph commonly used in source control.
14///
15/// Implementation of `Graph` that represents the common case of a directed
16/// acyclic graph in source control. You can implement this trait instead of
17/// `Graph` (as there is a blanket implementation for `Graph`) and also make use
18/// of `BasicStrategy`.
19pub trait BasicSourceControlGraph: Debug {
20    /// The type of nodes in the graph. This should be cheap to clone.
21    type Node: Clone + Debug + Hash + Eq + 'static;
22
23    /// An error type.
24    type Error: Debug + std::error::Error + 'static;
25
26    /// Get every node `X` in the graph such that `X == node` or there exists a
27    /// child of `X` that is an ancestor of `node`.
28    fn ancestors(&self, node: Self::Node) -> Result<HashSet<Self::Node>, Self::Error>;
29
30    /// Get the union of `ancestors(node)` for every node in `nodes`.
31    #[instrument]
32    fn ancestors_all(
33        &self,
34        nodes: HashSet<Self::Node>,
35    ) -> Result<HashSet<Self::Node>, Self::Error> {
36        let mut ancestors = HashSet::new();
37        for node in nodes {
38            ancestors.extend(self.ancestors(node)?);
39        }
40        Ok(ancestors)
41    }
42
43    /// Filter `nodes` to only include nodes that are not ancestors of any other
44    /// node in `nodes`.
45    fn ancestor_heads(
46        &self,
47        nodes: HashSet<Self::Node>,
48    ) -> Result<HashSet<Self::Node>, Self::Error> {
49        let node_to_ancestors: HashMap<Self::Node, HashSet<Self::Node>> = nodes
50            .iter()
51            .map(|node| Ok((node.clone(), self.ancestors(node.clone())?)))
52            .collect::<Result<_, _>>()?;
53        let heads: HashSet<Self::Node> = nodes
54            .into_iter()
55            .filter(|node| {
56                node_to_ancestors
57                    .iter()
58                    .filter_map(|(other_node, ancestors)| {
59                        if node == other_node {
60                            None
61                        } else {
62                            Some(ancestors)
63                        }
64                    })
65                    .all(|ancestors| !ancestors.contains(node))
66            })
67            .collect();
68        Ok(heads)
69    }
70
71    /// Get every node `X` in the graph such that `X == node` or there exists a
72    /// parent of `X` that is a descendant of `node`.
73    fn descendants(&self, node: Self::Node) -> Result<HashSet<Self::Node>, Self::Error>;
74
75    /// Filter `nodes` to only include nodes that are not descendants of any
76    /// other node in `nodes`.
77    fn descendant_roots(
78        &self,
79        nodes: HashSet<Self::Node>,
80    ) -> Result<HashSet<Self::Node>, Self::Error> {
81        let node_to_descendants: HashMap<Self::Node, HashSet<Self::Node>> = nodes
82            .iter()
83            .map(|node| Ok((node.clone(), self.descendants(node.clone())?)))
84            .collect::<Result<_, _>>()?;
85        let roots: HashSet<Self::Node> = nodes
86            .into_iter()
87            .filter(|node| {
88                node_to_descendants
89                    .iter()
90                    .filter_map(|(other_node, descendants)| {
91                        if node == other_node {
92                            None
93                        } else {
94                            Some(descendants)
95                        }
96                    })
97                    .all(|descendants| !descendants.contains(node))
98            })
99            .collect();
100        Ok(roots)
101    }
102
103    /// Get the union of `descendants(node)` for every node in `nodes`.
104    #[instrument]
105    fn descendants_all(
106        &self,
107        nodes: HashSet<Self::Node>,
108    ) -> Result<HashSet<Self::Node>, Self::Error> {
109        let mut descendants = HashSet::new();
110        for node in nodes {
111            descendants.extend(self.descendants(node)?);
112        }
113        Ok(descendants)
114    }
115}
116
117impl<T: BasicSourceControlGraph> search::Graph for T {
118    type Node = <Self as BasicSourceControlGraph>::Node;
119    type Error = <Self as BasicSourceControlGraph>::Error;
120
121    fn is_ancestor(
122        &self,
123        ancestor: Self::Node,
124        descendant: Self::Node,
125    ) -> Result<bool, Self::Error> {
126        let ancestors = self.ancestors(descendant)?;
127        Ok(ancestors.contains(&ancestor))
128    }
129
130    fn simplify_success_bounds(
131        &self,
132        nodes: HashSet<Self::Node>,
133    ) -> Result<HashSet<Self::Node>, Self::Error> {
134        self.ancestor_heads(nodes)
135    }
136
137    fn simplify_failure_bounds(
138        &self,
139        nodes: HashSet<Self::Node>,
140    ) -> Result<HashSet<Self::Node>, Self::Error> {
141        self.descendant_roots(nodes)
142    }
143}
144
145/// The possible strategies for searching the graph.
146#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
147pub enum BasicStrategyKind {
148    /// Search the nodes in the order that they were provided.
149    Linear,
150
151    /// Search the nodes in the reverse order that they were provided.
152    LinearReverse,
153
154    /// Conduct a binary search on the nodes by partitioning the nodes into two
155    /// groups of approximately equal size.
156    ///
157    /// TODO: Partitioning into groups of approximately equal size isn't
158    /// actually optimal for the DAG case. Really, we want to maximize the
159    /// information that we gain from each test. The `git bisect` algorithm at
160    /// <https://git-scm.com/docs/git-bisect-lk2009#_bisection_algorithm_discussed>
161    /// discusses a metric to find the best partition for the subgraph which
162    /// remains to be tested.
163    ///
164    /// See also `git-bisect`'s skip algorithm:
165    /// <https://git-scm.com/docs/git-bisect-lk2009#_skip_algorithm>. This does
166    /// *not* use the same skip algorithm, and instead uses a deterministic
167    /// approach. In order to solve the following problem:
168    ///
169    /// > sometimes the best bisection points all happened to be in an area
170    /// > where all the commits are untestable. And in this case the user was
171    /// > asked to test many untestable commits, which could be very inefficient.
172    ///
173    /// We instead consider the hypothetical case that the node is a success,
174    /// and yield further nodes as if it were a success, and then interleave
175    /// those nodes with the hypothetical failure case.
176    ///
177    /// Resources:
178    ///
179    /// - <https://git-scm.com/docs/git-bisect-lk2009#_bisection_algorithm_discussed>
180    /// - <https://byorgey.wordpress.com/2023/01/01/competitive-programming-in-haskell-better-binary-search/>
181    /// - <https://julesjacobs.com/notes/binarysearch/binarysearch.pdf>
182    Binary,
183}
184
185/// A set of basic search strategies defined by `BasicStrategyKind`.
186#[derive(Clone, Debug)]
187pub struct BasicStrategy {
188    strategy: BasicStrategyKind,
189}
190
191impl BasicStrategy {
192    /// Constructor.
193    pub fn new(strategy: BasicStrategyKind) -> Self {
194        Self { strategy }
195    }
196}
197
198impl<G: BasicSourceControlGraph> search::Strategy<G> for BasicStrategy {
199    type Error = G::Error;
200
201    fn midpoint(
202        &self,
203        graph: &G,
204        bounds: &search::Bounds<G::Node>,
205        statuses: &IndexMap<G::Node, search::Status>,
206    ) -> Result<Option<G::Node>, G::Error> {
207        let search::Bounds {
208            success: success_bounds,
209            failure: failure_bounds,
210        } = bounds;
211        let mut nodes_to_search = {
212            let implied_success_nodes = graph.ancestors_all(success_bounds.clone())?;
213            let implied_failure_nodes = graph.descendants_all(failure_bounds.clone())?;
214            statuses
215                .iter()
216                .filter_map(|(node, status)| match status {
217                    search::Status::Untested => Some(node.clone()),
218                    search::Status::Success
219                    | search::Status::Failure
220                    | search::Status::Indeterminate => None,
221                })
222                .filter(|node| {
223                    !implied_success_nodes.contains(node) && !implied_failure_nodes.contains(node)
224                })
225                .collect::<Vec<_>>()
226        };
227        let next_to_search: Option<G::Node> = match self.strategy {
228            BasicStrategyKind::Linear => nodes_to_search.into_iter().next(),
229            BasicStrategyKind::LinearReverse => nodes_to_search.into_iter().next_back(),
230            BasicStrategyKind::Binary => {
231                let middle_index = nodes_to_search.len() / 2;
232                if middle_index < nodes_to_search.len() {
233                    Some(nodes_to_search.swap_remove(middle_index))
234                } else {
235                    None
236                }
237            }
238        };
239        Ok(next_to_search)
240    }
241}
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246
247    use crate::search::Bounds;
248    use crate::search::EagerSolution;
249    use crate::search::Search;
250    use crate::search::Status;
251    use crate::testing::arb_strategy;
252    use crate::testing::arb_test_graph_and_nodes;
253    use crate::testing::TestGraph;
254    use crate::testing::UsizeGraph;
255
256    use itertools::Itertools;
257    use maplit::hashmap;
258    use maplit::hashset;
259
260    #[test]
261    fn test_search_stick() {
262        let graph = UsizeGraph { max: 7 };
263        let nodes = 0..graph.max;
264        let linear_strategy = BasicStrategy {
265            strategy: BasicStrategyKind::Linear,
266        };
267        let linear_reverse_strategy = BasicStrategy {
268            strategy: BasicStrategyKind::LinearReverse,
269        };
270        let binary_strategy = BasicStrategy {
271            strategy: BasicStrategyKind::Binary,
272        };
273        let mut search = Search::new(graph.clone(), nodes.clone());
274
275        assert_eq!(
276            search
277                .search(&linear_strategy)
278                .unwrap()
279                .into_eager()
280                .unwrap(),
281            EagerSolution {
282                bounds: Default::default(),
283                next_to_search: vec![0, 1, 2, 3, 4, 5, 6],
284            }
285        );
286        assert_eq!(
287            search
288                .search(&linear_reverse_strategy)
289                .unwrap()
290                .into_eager()
291                .unwrap(),
292            EagerSolution {
293                bounds: Default::default(),
294                next_to_search: vec![6, 5, 4, 3, 2, 1, 0],
295            }
296        );
297        assert_eq!(
298            search
299                .search(&binary_strategy)
300                .unwrap()
301                .into_eager()
302                .unwrap(),
303            EagerSolution {
304                bounds: Default::default(),
305                // Breadth-first search:
306                // 0 1 2 3 4 5 6
307                //       ^
308                //   ^
309                //           ^
310                // ^
311                //     ^
312                //         ^
313                //             ^
314                next_to_search: vec![3, 1, 5, 0, 2, 4, 6],
315            }
316        );
317
318        search.notify(2, Status::Success).unwrap();
319        assert_eq!(
320            search
321                .search(&linear_strategy)
322                .unwrap()
323                .into_eager()
324                .unwrap(),
325            EagerSolution {
326                bounds: Bounds {
327                    success: hashset! {2},
328                    failure: hashset! {},
329                },
330                next_to_search: vec![3, 4, 5, 6],
331            }
332        );
333        assert_eq!(
334            search
335                .search(&binary_strategy)
336                .unwrap()
337                .into_eager()
338                .unwrap(),
339            EagerSolution {
340                bounds: Bounds {
341                    success: hashset! {2},
342                    failure: hashset! {},
343                },
344                next_to_search: vec![5, 4, 6, 3],
345            }
346        );
347
348        search.notify(5, Status::Failure).unwrap();
349        assert_eq!(
350            search
351                .search(&linear_strategy)
352                .unwrap()
353                .into_eager()
354                .unwrap(),
355            EagerSolution {
356                bounds: Bounds {
357                    success: hashset! {2},
358                    failure: hashset! {5},
359                },
360                next_to_search: vec![3, 4],
361            }
362        );
363        assert_eq!(
364            search
365                .search(&binary_strategy)
366                .unwrap()
367                .into_eager()
368                .unwrap(),
369            EagerSolution {
370                bounds: Bounds {
371                    success: hashset! {2},
372                    failure: hashset! {5},
373                },
374                next_to_search: vec![4, 3],
375            }
376        );
377
378        search.notify(3, Status::Indeterminate).unwrap();
379        assert_eq!(
380            search
381                .search(&binary_strategy)
382                .unwrap()
383                .into_eager()
384                .unwrap(),
385            EagerSolution {
386                bounds: Bounds {
387                    success: hashset! {2},
388                    failure: hashset! {5},
389                },
390                next_to_search: vec![4],
391            }
392        );
393    }
394
395    #[test]
396    fn test_search_inconsistent_notify() {
397        let graph = UsizeGraph { max: 7 };
398        let nodes = 0..graph.max;
399        let mut search = Search::new(graph, nodes);
400
401        search.notify(4, Status::Success).unwrap();
402        insta::assert_debug_snapshot!(search.notify(3, Status::Failure), @r###"
403        Err(
404            InconsistentStateTransition {
405                ancestor_node: 3,
406                ancestor_status: Failure,
407                descendant_node: 4,
408                descendant_status: Success,
409            },
410        )
411        "###);
412
413        insta::assert_debug_snapshot!(search.notify(4, Status::Indeterminate), @r###"
414        Err(
415            IllegalStateTransition {
416                node: 4,
417                from: Success,
418                to: Indeterminate,
419            },
420        )
421        "###);
422
423        search.notify(5, Status::Failure).unwrap();
424        insta::assert_debug_snapshot!(search.notify(6, Status::Success), @r###"
425        Err(
426            InconsistentStateTransition {
427                ancestor_node: 5,
428                ancestor_status: Failure,
429                descendant_node: 6,
430                descendant_status: Success,
431            },
432        )
433        "###);
434    }
435
436    #[test]
437    fn test_search_dag() {
438        let graph = TestGraph {
439            // a -> b -> e -> f -> g
440            // c -> d ->   -> h
441            nodes: hashmap! {
442                'a' => hashset! {'b'},
443                'b' => hashset! {'e'},
444                'c' => hashset! {'d'},
445                'd' => hashset! {'e'},
446                'e' => hashset! {'f', 'h'},
447                'f' => hashset! {'g'},
448                'g' => hashset! {},
449                'h' => hashset! {},
450            },
451        };
452        let linear_strategy = BasicStrategy {
453            strategy: BasicStrategyKind::Linear,
454        };
455        assert_eq!(graph.descendants('e'), Ok(hashset! {'e', 'f', 'g', 'h'}));
456        assert_eq!(graph.ancestors('e'), Ok(hashset! {'a', 'b', 'c', 'd', 'e'}));
457
458        let mut search = Search::new(graph, 'a'..='h');
459        assert_eq!(
460            search
461                .search(&linear_strategy)
462                .unwrap()
463                .into_eager()
464                .unwrap(),
465            EagerSolution {
466                bounds: Default::default(),
467                // Breadth-first search: we start from the roots of the graph in
468                // parallel and proceed to the heads of the graph.
469                next_to_search: vec!['a', 'c', 'b', 'd', 'e', 'f', 'h', 'g'],
470            }
471        );
472
473        search.notify('b', Status::Success).unwrap();
474        search.notify('g', Status::Failure).unwrap();
475        assert_eq!(
476            search
477                .search(&linear_strategy)
478                .unwrap()
479                .into_eager()
480                .unwrap(),
481            EagerSolution {
482                bounds: Bounds {
483                    success: hashset! {'b'},
484                    failure: hashset! {'g'},
485                },
486                next_to_search: vec!['c', 'd', 'e', 'f', 'h'],
487            }
488        );
489
490        search.notify('e', Status::Success).unwrap();
491        assert_eq!(
492            search
493                .search(&linear_strategy)
494                .unwrap()
495                .into_eager()
496                .unwrap(),
497            EagerSolution {
498                bounds: Bounds {
499                    success: hashset! {'e'},
500                    failure: hashset! {'g'},
501                },
502                next_to_search: vec!['f', 'h'],
503            }
504        );
505
506        search.notify('f', Status::Success).unwrap();
507        assert_eq!(
508            search
509                .search(&linear_strategy)
510                .unwrap()
511                .into_eager()
512                .unwrap(),
513            EagerSolution {
514                bounds: Bounds {
515                    success: hashset! {'f'},
516                    failure: hashset! {'g'},
517                },
518                next_to_search: vec!['h'],
519            }
520        );
521
522        search.notify('h', Status::Success).unwrap();
523        assert_eq!(
524            search
525                .search(&linear_strategy)
526                .unwrap()
527                .into_eager()
528                .unwrap(),
529            EagerSolution {
530                bounds: Bounds {
531                    success: hashset! {'f', 'h'},
532                    failure: hashset! {'g'},
533                },
534                next_to_search: vec![],
535            }
536        );
537    }
538
539    proptest::proptest! {
540        #[test]
541        fn test_search_dag_proptest(strategy in arb_strategy(), (graph, failure_nodes) in arb_test_graph_and_nodes()) {
542            let nodes = graph.nodes.keys().sorted().copied().collect::<Vec<_>>();
543            let strategy = BasicStrategy {
544                strategy,
545            };
546            let mut search = Search::new(graph.clone(), nodes);
547            let failure_nodes = graph.descendants_all(failure_nodes.into_iter().collect()).unwrap();
548
549            let solution = loop {
550                let solution = search.search(&strategy).unwrap().into_eager().unwrap();
551                let Bounds { success, failure } = &solution.bounds;
552                for success_node in success {
553                    assert!(!failure_nodes.contains(success_node))
554                }
555                for failure_node in failure {
556                    assert!(failure_nodes.contains(failure_node));
557                }
558                match solution.next_to_search.first() {
559                    Some(node) => {
560                        search.notify(*node, if failure_nodes.contains(node) {
561                            Status::Failure
562                        } else {
563                            Status::Success
564                        }).unwrap();
565                    }
566                    None => break solution,
567                }
568            };
569
570            let nodes = graph.nodes.keys().copied().collect::<HashSet<_>>();
571            assert!(solution.bounds.success.is_subset(&nodes));
572            assert!(solution.bounds.failure.is_subset(&nodes));
573            assert!(solution.bounds.success.is_disjoint(&solution.bounds.failure));
574            let all_success_nodes = graph.ancestors_all(solution.bounds.success.clone()).unwrap();
575            let all_failure_nodes = graph.descendants_all(solution.bounds.failure).unwrap();
576            assert!(all_success_nodes.is_disjoint(&all_failure_nodes));
577            assert!(
578                all_success_nodes.union(&all_failure_nodes).copied().collect::<HashSet<_>>() == nodes,
579                "all_success_nodes: {all_success_nodes:?}, all_failure_nodes: {all_failure_nodes:?}, nodes: {nodes:?}",
580            );
581        }
582    }
583}