assembly_theory/
enumerate.rs

1//! Enumerate connected, non-induced subgraphs of a molecular graph.
2//!
3//! Specifically, for a molecule with |E| edges, enumerate all connected,
4//! non-induced subgraphs with at most |E|/2 edges. Any larger subgraphs
5//! cannot be "duplicatable" (i.e., in a pair of non-overlapping, isomorphic
6//! subgraphs), so we don't need them.
7
8use std::collections::HashSet;
9
10use bit_set::BitSet;
11use clap::ValueEnum;
12use petgraph::graph::EdgeIndex;
13
14use crate::{molecule::Molecule, utils::edge_neighbors};
15
16/// Strategy for enumerating connected, non-induced subgraphs.
17#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum, Debug)]
18pub enum EnumerateMode {
19    /// Grow connected subgraphs from each edge using iterative extension.
20    Extend,
21    /// From a subgraph, choose an edge from its boundary and either grow it by
22    /// adding this edge or erode its remainder by discarding the edge.
23    GrowErode,
24}
25
26/// Return an interator over all connected, non-induced subgraphs of the
27/// molecular graph `mol` using the algorithm specified by `mode`.
28pub fn enumerate_subgraphs(mol: &Molecule, mode: EnumerateMode) -> HashSet<BitSet> {
29    match mode {
30        EnumerateMode::Extend => extend(mol),
31        EnumerateMode::GrowErode => grow_erode(mol),
32    }
33}
34
35/// Enumerate connected, non-induced subgraphs with at most |E|/2 edges using
36/// a process of iterative extension starting from each individual edge.
37fn extend(mol: &Molecule) -> HashSet<BitSet> {
38    // Maintain a vector of sets of subgraphs at each level of the process,
39    // starting with all edges individually at the first level.
40    let mut subgraphs: Vec<HashSet<BitSet>> =
41        vec![HashSet::from_iter(mol.graph().edge_indices().map(|ix| {
42            let mut subgraph = BitSet::new();
43            subgraph.insert(ix.index());
44            subgraph
45        }))];
46
47    // At each level, collect and deduplicate all ways of extending subgraphs
48    // by one neighboring edge.
49    for level in 0..(mol.graph().edge_count() / 2) {
50        let mut extended_subgraphs = HashSet::new();
51        for subgraph in &subgraphs[level] {
52            // Find all "frontier" edges incident to this subgraph (contains
53            // both this subgraph's edges and its edge boundary).
54            let frontier =
55                BitSet::from_iter(subgraph.iter().flat_map(|i| {
56                    edge_neighbors(mol.graph(), EdgeIndex::new(i)).map(|ix| ix.index())
57                }));
58
59            // Collect and deduplicate all subgraphs obtained by extending the
60            // current subgraph using one edge from its boundary.
61            for edge in frontier.difference(subgraph) {
62                let mut extended_subgraph = subgraph.clone();
63                extended_subgraph.insert(edge);
64                extended_subgraphs.insert(extended_subgraph);
65            }
66        }
67
68        subgraphs.push(extended_subgraphs);
69    }
70
71    // Return an iterator over subgraphs, skipping singleton edges.
72    subgraphs
73        .into_iter()
74        .skip(1)
75        .flatten()
76        .collect::<HashSet<_>>()
77}
78
79/// Enumerate connected, non-induced subgraphs with at most |E|/2 edges; at
80/// each step, choose one edge from the current subgraph's boundary and either
81/// add it to the subgraph or discard it from the remainder. See:
82/// - https://stackoverflow.com/a/15722579
83/// - https://stackoverflow.com/a/15658245
84fn grow_erode(mol: &Molecule) -> HashSet<BitSet> {
85    // Initialize the current subgraph and its "frontier" (i.e., the union of
86    // its edges and its edge boundary) as well as the "remainder", which is
87    // all edges not in the current subgraph.
88    let subgraph = BitSet::new();
89    let frontier = BitSet::new();
90    let remainder = BitSet::from_iter(mol.graph().edge_indices().map(|ix| ix.index()));
91
92    // Set up a set of subgraphs enumerated so far.
93    let mut subgraphs = HashSet::new();
94
95    // Maintain a stack of subgraph instances to extend.
96    let mut stack = vec![(subgraph, frontier, remainder)];
97    while let Some((mut subgraph, mut frontier, mut remainder)) = stack.pop() {
98        // Get the next edge from the subgraph's edge boundary or, if the
99        // subgraph is empty, from the remainder.
100        let candidate = if subgraph.is_empty() {
101            remainder.iter().next()
102        } else {
103            remainder.intersection(&frontier).next()
104        };
105
106        if let Some(e) = candidate {
107            // Make a new instance by discarding the candidate edge entirely.
108            remainder.remove(e);
109            stack.push((subgraph.clone(), frontier.clone(), remainder.clone()));
110
111            // Make another instance by adding the candidate edge to the
112            // subgraph and updating the frontier accordingly if the new
113            // subgraph was not previously enumerated and is not too large to
114            // be part of a non-overlapping isomorphic pair.
115            subgraph.insert(e);
116            if !subgraphs.contains(&subgraph) && subgraph.len() <= mol.graph().edge_count() / 2 {
117                frontier
118                    .extend(edge_neighbors(mol.graph(), EdgeIndex::new(e)).map(|ix| ix.index()));
119                stack.push((subgraph, frontier, remainder));
120            }
121        } else if subgraph.len() > 1 {
122            // When all candidate edges are exhausted, collect this subgraph
123            // unless it is just a singleton edge (basic unit).
124            subgraphs.insert(subgraph);
125        }
126    }
127
128    subgraphs
129}