assembly_theory/enumerate.rs
1//! Enumerate connected, non-induced subgraphs of a molecular graph.
2//!
3//! Specifically, for a molecule with |E| edges, enumerate all connected,
4//! non-induced subgraphs with at most |E|/2 edges. Any larger subgraphs
5//! cannot be "duplicatable" (i.e., in a pair of non-overlapping, isomorphic
6//! subgraphs), so we don't need them.
7
8use std::collections::HashSet;
9
10use bit_set::BitSet;
11use clap::ValueEnum;
12use petgraph::graph::EdgeIndex;
13
14use crate::{molecule::Molecule, utils::edge_neighbors};
15
16/// Strategy for enumerating connected, non-induced subgraphs.
17#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum, Debug)]
18pub enum EnumerateMode {
19 /// Grow connected subgraphs from each edge using iterative extension.
20 Extend,
21 /// From a subgraph, choose an edge from its boundary and either grow it by
22 /// adding this edge or erode its remainder by discarding the edge.
23 GrowErode,
24}
25
26/// Return an interator over all connected, non-induced subgraphs of the
27/// molecular graph `mol` using the algorithm specified by `mode`.
28pub fn enumerate_subgraphs(mol: &Molecule, mode: EnumerateMode) -> HashSet<BitSet> {
29 match mode {
30 EnumerateMode::Extend => extend(mol),
31 EnumerateMode::GrowErode => grow_erode(mol),
32 }
33}
34
35/// Enumerate connected, non-induced subgraphs with at most |E|/2 edges using
36/// a process of iterative extension starting from each individual edge.
37fn extend(mol: &Molecule) -> HashSet<BitSet> {
38 // Maintain a vector of sets of subgraphs at each level of the process,
39 // starting with all edges individually at the first level.
40 let mut subgraphs: Vec<HashSet<BitSet>> =
41 vec![HashSet::from_iter(mol.graph().edge_indices().map(|ix| {
42 let mut subgraph = BitSet::new();
43 subgraph.insert(ix.index());
44 subgraph
45 }))];
46
47 // At each level, collect and deduplicate all ways of extending subgraphs
48 // by one neighboring edge.
49 for level in 0..(mol.graph().edge_count() / 2) {
50 let mut extended_subgraphs = HashSet::new();
51 for subgraph in &subgraphs[level] {
52 // Find all "frontier" edges incident to this subgraph (contains
53 // both this subgraph's edges and its edge boundary).
54 let frontier =
55 BitSet::from_iter(subgraph.iter().flat_map(|i| {
56 edge_neighbors(mol.graph(), EdgeIndex::new(i)).map(|ix| ix.index())
57 }));
58
59 // Collect and deduplicate all subgraphs obtained by extending the
60 // current subgraph using one edge from its boundary.
61 for edge in frontier.difference(subgraph) {
62 let mut extended_subgraph = subgraph.clone();
63 extended_subgraph.insert(edge);
64 extended_subgraphs.insert(extended_subgraph);
65 }
66 }
67
68 subgraphs.push(extended_subgraphs);
69 }
70
71 // Return an iterator over subgraphs, skipping singleton edges.
72 subgraphs
73 .into_iter()
74 .skip(1)
75 .flatten()
76 .collect::<HashSet<_>>()
77}
78
79/// Enumerate connected, non-induced subgraphs with at most |E|/2 edges; at
80/// each step, choose one edge from the current subgraph's boundary and either
81/// add it to the subgraph or discard it from the remainder. See:
82/// - https://stackoverflow.com/a/15722579
83/// - https://stackoverflow.com/a/15658245
84fn grow_erode(mol: &Molecule) -> HashSet<BitSet> {
85 // Initialize the current subgraph and its "frontier" (i.e., the union of
86 // its edges and its edge boundary) as well as the "remainder", which is
87 // all edges not in the current subgraph.
88 let subgraph = BitSet::new();
89 let frontier = BitSet::new();
90 let remainder = BitSet::from_iter(mol.graph().edge_indices().map(|ix| ix.index()));
91
92 // Set up a set of subgraphs enumerated so far.
93 let mut subgraphs = HashSet::new();
94
95 // Maintain a stack of subgraph instances to extend.
96 let mut stack = vec![(subgraph, frontier, remainder)];
97 while let Some((mut subgraph, mut frontier, mut remainder)) = stack.pop() {
98 // Get the next edge from the subgraph's edge boundary or, if the
99 // subgraph is empty, from the remainder.
100 let candidate = if subgraph.is_empty() {
101 remainder.iter().next()
102 } else {
103 remainder.intersection(&frontier).next()
104 };
105
106 if let Some(e) = candidate {
107 // Make a new instance by discarding the candidate edge entirely.
108 remainder.remove(e);
109 stack.push((subgraph.clone(), frontier.clone(), remainder.clone()));
110
111 // Make another instance by adding the candidate edge to the
112 // subgraph and updating the frontier accordingly if the new
113 // subgraph was not previously enumerated and is not too large to
114 // be part of a non-overlapping isomorphic pair.
115 subgraph.insert(e);
116 if !subgraphs.contains(&subgraph) && subgraph.len() <= mol.graph().edge_count() / 2 {
117 frontier
118 .extend(edge_neighbors(mol.graph(), EdgeIndex::new(e)).map(|ix| ix.index()));
119 stack.push((subgraph, frontier, remainder));
120 }
121 } else if subgraph.len() > 1 {
122 // When all candidate edges are exhausted, collect this subgraph
123 // unless it is just a singleton edge (basic unit).
124 subgraphs.insert(subgraph);
125 }
126 }
127
128 subgraphs
129}