graphfind_rs/pattern_matching/
vf_algorithms.rs

1use std::{
2    cmp::Ordering,
3    collections::{HashMap, HashSet},
4    fmt::Debug,
5    hash::Hash,
6};
7
8use bimap::BiHashMap;
9
10use crate::filter_map::FilterMap;
11use crate::{
12    graph::{incoming_nodes, outgoing_nodes, Graph},
13    pattern_matching::{MatchedGraph, PatternElement, PatternGraph, SubgraphAlgorithm},
14};
15
16/// Implements an subgraph isomorphism algorithm based on the papers
17/// "A (Sub)Graph Isomorphism Algorithm for Matching Large Graphs"
18/// by Cordella, Foggia, Sansone, and Vento, published in 2004
19/// (doi 10.1109/TPAMI.2004.75) as well as
20/// "Performance Evaluation of the VF Graph Matching Algorithm"
21/// by the same authors in 1999 (doi 10.1109/ICIAP.1999.797762).
22/// The paper referenced above call this algorithm VF2 respectively VF.
23
24/// VfState defines the required data structures as defined in Subsection 2.4
25/// of the 2004 paper, as well as the algorithms to run them.
26pub struct VfState<
27    'a,
28    NodeWeight,
29    EdgeWeight,
30    NRef,
31    ERef,
32    N2Ref,
33    E2Ref,
34    P: PatternGraph<NodeWeight, EdgeWeight, NodeRef = NRef, EdgeRef = ERef>,
35    B: Graph<NodeWeight, EdgeWeight, NodeRef = N2Ref, EdgeRef = E2Ref>,
36> where
37    NRef: Debug,
38    ERef: Debug,
39    N2Ref: Debug,
40    E2Ref: Debug,
41{
42    /// Reference to the pattern graph.
43    pattern_graph: &'a P,
44    /// Reference to the base graph.
45    base_graph: &'a B,
46    /// Vec of found graphs we may return.
47    results: Vec<MatchedGraph<'a, NodeWeight, EdgeWeight, P>>,
48
49    /// Matching of nodes in `pattern_graph` to suitable nodes in `base_graph`.
50    /// `core[n] = m` says that the node `n` can be matched to node `m`.
51    ///
52    /// We use this map to find possible candidates for next nodes, and to
53    /// construct the result graph.
54    core: BiHashMap<NRef, N2Ref>,
55    /// `out_1` is a matching between outgoing node references from `pattern_graph`,
56    /// and the search depth at which they were inserted. We use this mapping
57    /// to find possible successor nodes to insert into `core_1`.
58    out_1: HashMap<NRef, usize>,
59    /// Matching for outgoing nodes of `base_graph`. Analog Definition to `out_1`.
60    out_2: HashMap<N2Ref, usize>,
61    /// `in_1` maps nodes from `core_1` and their predecessors to the search depth
62    /// at which they were inserted. We use this mapping to find possible predecessors
63    /// of matched nodes to insert into `core_1`.
64    in_1: HashMap<NRef, usize>,
65    /// Matching for incoming nodes of `pattern_graph`. Analog Definition to `in_1`.
66    in_2: HashMap<N2Ref, usize>,
67
68    /// Counter for how many nodes we actually need to return.
69    nodes_to_take: usize,
70}
71
72/// Implementation of VfState/the VF2 Algorithm.
73/// This contains the actual parts related to subgraph isomorphism.
74impl<'a, NodeWeight, EdgeWeight, NRef, ERef, N2Ref, E2Ref, P, B>
75    VfState<'a, NodeWeight, EdgeWeight, NRef, ERef, N2Ref, E2Ref, P, B>
76where
77    NRef: Copy + Hash + Ord + Debug,
78    N2Ref: Copy + Hash + Eq + Debug,
79    ERef: Copy + Eq + Hash + Debug,
80    E2Ref: Copy + Debug,
81    P: PatternGraph<NodeWeight, EdgeWeight, NodeRef = NRef, EdgeRef = ERef>,
82    B: Graph<NodeWeight, EdgeWeight, NodeRef = N2Ref, EdgeRef = E2Ref>,
83{
84    /// Gives an ordering of two nodes, n1 and n2, from `pattern_graph`.
85    /// This ordering ensures that:
86    ///
87    /// 1. We process nodes in the result before ignored ones.
88    /// 2. We follow the given ordering of the node indices.
89    ///
90    /// We use this method to ensure set semantics.
91    fn give_node_order(&self, n1: NRef, n2: NRef) -> Ordering {
92        let n1_appears = self.pattern_graph.node_weight(n1).should_appear();
93        let n2_appears = self.pattern_graph.node_weight(n2).should_appear();
94        if n1_appears && !n2_appears {
95            Ordering::Less
96        } else if !n1_appears && n2_appears {
97            Ordering::Greater
98        } else {
99            n1.cmp(&n2)
100        }
101    }
102
103    /// Returns a tuple (N, N2) of node references.
104    /// N contains the smallest unmatched node within the pattern graph,
105    /// and N2 unmatched nodes within the base graph.
106    /// When matched nodes contain a successor, we use another method.
107    ///
108    /// This ordering is described in the 1999 first paper.
109    fn find_unmatched_unconnected_nodes(&'a self) -> (Option<NRef>, Vec<N2Ref>) {
110        let n = self
111            .pattern_graph
112            .nodes()
113            .filter(|n| !self.core.contains_left(n))
114            .min_by(|n1, n2| self.give_node_order(*n1, *n2));
115
116        let base_nodes: Vec<_> = self
117            .base_graph
118            .nodes()
119            .filter(|n| !self.core.contains_right(n))
120            .collect();
121
122        (n, base_nodes)
123    }
124
125    /// General implementation of the unmatched neighbors methods.
126    /// `pattern_map` and `base_map` contain the in/out sets of matched nodes,
127    /// and their forward/backward neighbors.
128    ///
129    /// The result, (N, N2), is the smallest unmatched neighbor node reference in `pattern_graph`,
130    /// if it exists, and the unmatched neighbor node references in `base_graph`.
131    ///
132    /// When `N` is an ignored node and we are still looking for nodes to add to the result,
133    /// `N` will be None so that the algorithm enforces set semantics for the results.
134    fn find_unmatched_neighbors(
135        &'a self,
136        pattern_map: &HashMap<NRef, usize>,
137        base_map: &HashMap<N2Ref, usize>,
138        find_ignored: bool,
139    ) -> (Option<NRef>, Vec<N2Ref>) {
140        // From pattern_map, i.e. neighbors of matched nodes in pattern_graph,
141        // only select those where no entry is in core.
142        let n = pattern_map
143            .keys()
144            .filter(|n_out| !self.core.contains_left(n_out))
145            .min_by(|n1, n2| self.give_node_order(**n1, **n2))
146            .filter(|n| find_ignored || self.pattern_graph.node_weight(**n).should_appear())
147            .cloned();
148
149        let n2: Vec<_> = base_map
150            .keys()
151            .filter(|m_out| !self.core.contains_right(m_out))
152            .cloned()
153            .collect();
154        (n, n2)
155    }
156
157    /// Matches node n to node m, where n is from the pattern, and m is from the base graph.
158    /// Update out_1/out_2/in_1/in_2 to hold the insertion depths.
159    fn assign(&mut self, n: NRef, m: N2Ref, depth: usize) {
160        // Update core/out/in sets.
161        self.core.insert(n, m);
162        self.out_1.entry(n).or_insert(depth);
163        self.out_2.entry(m).or_insert(depth);
164        self.in_1.entry(n).or_insert(depth);
165        self.in_2.entry(m).or_insert(depth);
166
167        // Iterate over the neighbors of n, and add them to the out_1 set/map.
168        outgoing_nodes(self.pattern_graph, n).for_each(|n_out| {
169            self.out_1.entry(n_out).or_insert(depth);
170        });
171        // Repeat the process for the outgoing neighbors of m.
172        outgoing_nodes(self.base_graph, m).for_each(|m_out| {
173            self.out_2.entry(m_out).or_insert(depth);
174        });
175        // Iterate for the predecessors of n and add them to in_1.
176        incoming_nodes(self.pattern_graph, n).for_each(|n_in| {
177            self.in_1.entry(n_in).or_insert(depth);
178        });
179        // Repeat for in_2 and predecessors of m.
180        incoming_nodes(self.base_graph, m).for_each(|m_in| {
181            self.in_2.entry(m_in).or_insert(depth);
182        });
183    }
184
185    /// Tests if matching node n to node m is allowed. This is a shorthand for
186    /// these conditions:
187    ///
188    /// ### Syntactic:
189    /// 1. `check_predecessor_relation`
190    /// 2. `check_successor_relation`
191    ///
192    /// ### Semantic:
193    /// 1. `check_node_semantics`
194    /// 2. `check_edge_semantics`
195    fn is_valid_matching(&self, n: NRef, m: N2Ref) -> bool {
196        self.check_node_semantics(n, m)
197            && self.check_predecessor_relation(n, m)
198            && self.check_successor_relation(n, m)
199            && self.check_edge_semantics(n, m)
200    }
201
202    /// Test that assigning n to m leaves the predecessor relations intact:
203    /// We may map any matched predecessor n' of n in `pattern_graph` to
204    /// another matched node m' that precedes m in `base_graph`.
205    fn check_predecessor_relation(&self, n: NRef, m: N2Ref) -> bool {
206        // M_1(s) intersected with Pred(G_1, n)
207        let n_preds: HashSet<_> = incoming_nodes(self.pattern_graph, n)
208            .filter(|n_pred| self.core.contains_left(n_pred))
209            .collect();
210        // M_2(s) intersected with Pred(G_2, m).
211        let m_preds: HashSet<_> = incoming_nodes(self.base_graph, m)
212            .filter(|m_pred| self.core.contains_right(m_pred))
213            .collect();
214
215        // Map every node n2 of n_preds to a predecessor m2 of m.
216        // Also map every node m2 of m_preds to a predecessor n2 of n.
217        n_preds.iter().all(|n2| {
218            self.core
219                .get_by_left(n2)
220                .is_some_and(|m2| m_preds.contains(m2))
221        })
222    }
223
224    /// Test that assigning n to m leaves the successor relations intact:
225    /// We may map any matched successor n' of n in `pattern_graph` to
226    /// another matched node m' that succeeds m in `base_graph`.
227    fn check_successor_relation(&self, n: NRef, m: N2Ref) -> bool {
228        // M_1(s) intersected with Succ(G_1, n)
229        let n_succs: HashSet<_> = outgoing_nodes(self.pattern_graph, n)
230            .filter(|n_succ| self.core.contains_left(n_succ))
231            .collect();
232        // M_2(s) intersected with Succ(G_2, m).
233        let m_succs: HashSet<_> = outgoing_nodes(self.base_graph, m)
234            .filter(|m_succ| self.core.contains_right(m_succ))
235            .collect();
236
237        // n2 should be mapped to another node m2, and that node is a successor of m.
238        n_succs.iter().all(|n2| {
239            self.core
240                .get_by_left(n2)
241                .is_some_and(|m2| m_succs.contains(m2))
242        })
243    }
244
245    /// Test whether node n in the pattern may be matched to node m
246    /// in the graph. That means that the matcher function for n
247    /// must return true for the node referred to by m.
248    fn check_node_semantics(&self, n: NRef, m: N2Ref) -> bool {
249        let matcher = self.pattern_graph.node_weight(n);
250        let refed_node = self.base_graph.node_weight(m);
251        matcher.may_match(refed_node)
252    }
253
254    /// Consider all edges e that lead to and from n. Take those edges for
255    /// which we already established a matching to another node m.
256    fn check_edge_semantics(&self, n: NRef, m: N2Ref) -> bool {
257        // Take successor edges of n that have been matched.
258        let n_succs_matched = self
259            .pattern_graph
260            .outgoing_edges(n)
261            .map(|e| (self.pattern_graph.adjacent_nodes(e).1, e))
262            .filter(|(n_succ, _)| self.core.contains_left(n_succ));
263
264        // Map successor edges of m to their outgoing nodes.
265        let m_succs_matched: HashMap<N2Ref, E2Ref> = self
266            .base_graph
267            .outgoing_edges(m)
268            .map(|e| (self.base_graph.adjacent_nodes(e).1, e))
269            .filter(|(m_succ, _)| self.core.contains_right(m_succ))
270            .collect();
271
272        // Map successor edges.
273        let n_m_succ_edges = n_succs_matched
274            .map(|(n_succ, e)| (e, m_succs_matched[self.core.get_by_left(&n_succ).unwrap()]));
275
276        // Take predecessor edges of n that have been matched.
277        let n_preds_matched = self
278            .pattern_graph
279            .incoming_edges(n)
280            .map(|e| (self.pattern_graph.adjacent_nodes(e).0, e))
281            .filter(|(n_pred, _)| self.core.contains_left(n_pred));
282
283        // Map predecessor edges of m to their incoming nodes.
284        let m_preds_matched: HashMap<N2Ref, E2Ref> = self
285            .base_graph
286            .incoming_edges(m)
287            .map(|e| (self.base_graph.adjacent_nodes(e).0, e))
288            .filter(|(m_pred, _)| self.core.contains_right(m_pred))
289            .collect();
290
291        // Map predecessor edges.
292        let n_m_pred_edges = n_preds_matched
293            .map(|(n_pred, e)| (e, m_preds_matched[self.core.get_by_left(&n_pred).unwrap()]));
294
295        // All successor edges in base_graph conform to the specification in pattern_graph,
296        // and so do the predecessors.
297        n_m_pred_edges.chain(n_m_succ_edges).all(|(e, e2)| {
298            let matcher = self.pattern_graph.edge_weight(e);
299            let matched = self.base_graph.edge_weight(e2);
300            matcher.may_match(matched)
301        })
302    }
303
304    /// Undoes the matching between nodes n and m.
305    fn unassign(&mut self, n: &NRef, m: &N2Ref, depth: usize) {
306        // Remove from core set
307        self.core.remove_by_left(n);
308        // Remove from out/in sets + neighbors.
309        Self::remove(n, depth, &mut self.out_1);
310        Self::remove(m, depth, &mut self.out_2);
311        Self::remove(n, depth, &mut self.in_1);
312        Self::remove(m, depth, &mut self.in_2);
313
314        // out_1/Pattern Graph
315        outgoing_nodes(self.pattern_graph, *n)
316            .for_each(|n_out| Self::remove(&n_out, depth, &mut self.out_1));
317        // out_2/Base Graph
318        outgoing_nodes(self.base_graph, *m)
319            .for_each(|m_out| Self::remove(&m_out, depth, &mut self.out_2));
320        // in_1/Pattern Graph
321        incoming_nodes(self.pattern_graph, *n)
322            .for_each(|n_in| Self::remove(&n_in, depth, &mut self.in_1));
323        // in_2/Base Graph
324        incoming_nodes(self.base_graph, *m)
325            .for_each(|n_in| Self::remove(&n_in, depth, &mut self.in_2));
326    }
327
328    /// Removes index from map if its insertion depth is equal to
329    /// its insertion depth. Removes thus nodes from the out set.
330    fn remove<N>(index: &N, depth: usize, map: &mut HashMap<N, usize>)
331    where
332        N: Eq + Hash,
333    {
334        if let Some(insert_depth) = map.get(index) {
335            if insert_depth == &depth {
336                map.remove(index);
337            }
338        }
339    }
340
341    /// Produces a new ResultGraph for the current graph state.
342    ///
343    /// Copy the keys from pattern_graph along with the weights referred
344    /// to by the depths from base_graph. Note that any elements in the result graph that
345    /// are marked as ignored, will not appear in the result.
346    fn produce_graph(&mut self) {
347        // Get node references and weights.
348        let node_list = self
349            .core
350            .iter()
351            .filter(|(n, _)| self.pattern_graph.node_weight(**n).should_appear())
352            .map(|(n, m)| (*n, self.base_graph.node_weight(*m)))
353            .collect();
354
355        // Mutable Edge List.
356        let mut edge_list = HashMap::new();
357        // Find outgoing nodes (E, E2) of each matching and matched node pair (n, m).
358        // Match each edge e from E to another e2 from E2 based on their matched successors,
359        // then e to the weight associated with e2.
360        for (n, m) in &self.core {
361            let n_succs = self
362                .pattern_graph
363                .outgoing_edges(*n)
364                .filter(|e| self.pattern_graph.edge_weight(*e).should_appear())
365                .map(|e| (self.pattern_graph.adjacent_nodes(e).1, e));
366            let m_succs: HashMap<_, _> = self
367                .base_graph
368                .outgoing_edges(*m)
369                .map(|e2| (self.base_graph.adjacent_nodes(e2).1, e2))
370                .collect();
371            n_succs
372                .map(|(n_succ, e)| (e, m_succs[self.core.get_by_left(&n_succ).unwrap()]))
373                .map(|(e, e2)| (e, self.base_graph.edge_weight(e2)))
374                .for_each(|(e_ref, edge_weight)| {
375                    edge_list.insert(e_ref, edge_weight);
376                });
377        }
378
379        let result = FilterMap::new(self.pattern_graph, node_list, edge_list);
380        self.results.push(result);
381    }
382
383    /// Looks up subgraphs and puts them into results.
384    ///
385    /// Returns the node number to go back to. Thus prevents
386    /// duplicate matches when we have elements that we ignore.
387    fn find_subgraphs(&mut self, depth: usize) -> usize {
388        // Full match may now be added.
389        if depth == self.pattern_graph.count_nodes() {
390            self.produce_graph();
391            self.nodes_to_take
392        } else {
393            let find_ignored = depth >= self.nodes_to_take;
394            // Find unmatched nodes that are outgoing neighbors of matched nodes.
395            let (mut pat_node, mut base_nodes) =
396                self.find_unmatched_neighbors(&self.out_1, &self.out_2, find_ignored);
397            // Failing that, try incoming neighbors.
398            if pat_node.is_none() || base_nodes.is_empty() {
399                (pat_node, base_nodes) =
400                    self.find_unmatched_neighbors(&self.in_1, &self.in_2, find_ignored);
401            }
402            // Failing that also, try unmatched and unconnected nodes.
403            if pat_node.is_none() || base_nodes.is_empty() {
404                (pat_node, base_nodes) = self.find_unmatched_unconnected_nodes();
405            }
406
407            // Assert we always will have a node in the pattern.
408            let n = pat_node.unwrap();
409            for m in base_nodes {
410                self.assign(n, m, depth);
411                // Test compatibility.
412                if self.is_valid_matching(n, m) {
413                    // What node do we need to assign next /
414                    // do we need to go back?
415                    let next_node = self.find_subgraphs(depth + 1);
416                    if next_node == self.nodes_to_take && next_node <= depth {
417                        // Restore State early
418                        self.unassign(&n, &m, depth);
419                        return next_node;
420                    }
421                }
422                self.unassign(&n, &m, depth);
423            }
424            depth
425        }
426    }
427
428    /// Creates a new VfState for the given pattern graph and base graph.
429    /// Initialized for each base_graph instance, to use its specific indices.
430    ///
431    /// ## Input:
432    /// 1. `pattern_graph`, a PatternGraph with NRef node references.
433    /// 2. `base_graph`, any Graph with N2Ref node references.
434    ///
435    /// ## Output:
436    /// A VfState struct.
437    fn init(
438        pattern_graph: &'a P,
439        base_graph: &'a B,
440    ) -> VfState<'a, NodeWeight, EdgeWeight, NRef, ERef, N2Ref, E2Ref, P, B> {
441        // Count the number of nodes to not ignore.
442        let nodes_to_take = pattern_graph
443            .nodes()
444            .filter(|n| pattern_graph.node_weight(*n).should_appear())
445            .count();
446
447        VfState {
448            pattern_graph,
449            base_graph,
450            results: vec![],
451            core: BiHashMap::new(),
452            out_1: HashMap::new(),
453            out_2: HashMap::new(),
454            in_1: HashMap::new(),
455            in_2: HashMap::new(),
456            nodes_to_take,
457        }
458    }
459
460    /// Handles empty patterns and otherwise calls the
461    /// predefined search function.
462    fn run_query(&mut self) {
463        // Check in advance that our pattern fits in the base graph.
464        if self.pattern_graph.is_empty_graph()
465            || self.pattern_graph.count_nodes() > self.base_graph.count_nodes()
466            || self.pattern_graph.count_edges() > self.base_graph.count_edges()
467        {
468            return;
469        }
470        let _ = self.find_subgraphs(0);
471    }
472}
473
474impl<'a, NodeWeight, EdgeWeight, NRef, ERef, N2Ref, E2Ref, P, B>
475    SubgraphAlgorithm<'a, NodeWeight, EdgeWeight, NRef, ERef, N2Ref, E2Ref, P, B>
476    for VfState<'a, NodeWeight, EdgeWeight, NRef, ERef, N2Ref, E2Ref, P, B>
477where
478    NRef: Copy + Hash + Ord + Debug,
479    N2Ref: Copy + Hash + Eq + Debug,
480    ERef: Copy + Hash + Eq + Debug,
481    E2Ref: Copy + Debug,
482    P: PatternGraph<NodeWeight, EdgeWeight, NodeRef = NRef, EdgeRef = ERef>,
483    B: Graph<NodeWeight, EdgeWeight, NodeRef = N2Ref, EdgeRef = E2Ref>,
484{
485    fn eval(
486        pattern_graph: &'a P,
487        base_graph: &'a B,
488    ) -> Vec<
489        FilterMap<
490            'a,
491            PatternElement<NodeWeight>,
492            PatternElement<EdgeWeight>,
493            &'a NodeWeight,
494            &'a EdgeWeight,
495            P,
496        >,
497    > {
498        let mut vfstate = VfState::init(pattern_graph, base_graph);
499        vfstate.run_query();
500
501        // Move results out of vstate struct before dropping it.
502        std::mem::take(&mut vfstate.results)
503    }
504}