graph_neighbor_matching/
similarity_matrix.rs

1use crate::{Edges, Graph, NodeColorMatching, ScoreNorm};
2use approx::relative_eq;
3use closed01::Closed01;
4use munkres::{solve_assignment, Position, WeightMatrix};
5use ndarray::{Array2, FoldWhile, Zip};
6use std::{cmp, mem};
7
8type Matrix = Array2<f32>;
9
10#[derive(Debug)]
11pub struct SimilarityMatrix<'a, F, G, E, N>
12where
13    F: NodeColorMatching<N>,
14    G: Graph<EDGE = E, NODE = N> + 'a,
15    E: Edges,
16    N: Clone,
17{
18    graph_a: &'a G,
19    graph_b: &'a G,
20    node_color_matching: F,
21    // current version of similarity matrix
22    current: Matrix,
23    // previous version of similarity matrix
24    previous: Matrix,
25    // current number of iterations
26    num_iterations: usize,
27}
28
29impl<'a, F, G, E, N> SimilarityMatrix<'a, F, G, E, N>
30where
31    F: NodeColorMatching<N>,
32    G: Graph<EDGE = E, NODE = N>,
33    E: Edges,
34    N: Clone,
35{
36    pub fn new(
37        graph_a: &'a G,
38        graph_b: &'a G,
39        node_color_matching: F,
40    ) -> SimilarityMatrix<'a, F, G, E, N> {
41        // `x` is the node-similarity matrix.
42        // we initialize `x`, so that x[i,j]=1 for all i in A.edges() and j in
43        // B.edges().
44        let shape = (graph_a.num_nodes(), graph_b.num_nodes());
45        let x = Matrix::from_shape_fn(shape, |(i, j)| {
46            if graph_a.node_degree(i) > 0 && graph_b.node_degree(j) > 0 {
47                // this is normally set to 1.0 (i.e. without node color matching).
48                node_color_matching
49                    .node_color_matching(graph_a.node_value(i), graph_b.node_value(j))
50            } else {
51                Closed01::zero()
52            }
53            .get()
54        });
55
56        let new_x = Matrix::from_elem(shape, Closed01::zero().get());
57
58        SimilarityMatrix {
59            graph_a,
60            graph_b,
61            node_color_matching,
62            current: x,
63            previous: new_x,
64            num_iterations: 0,
65        }
66    }
67
68    fn in_eps(&self, eps: f32) -> bool {
69        Zip::from(&self.previous)
70            .and(&self.current)
71            .fold_while(true, |all_prev_in_eps, x, y| {
72                if all_prev_in_eps && relative_eq!(x, y, epsilon = eps) {
73                    FoldWhile::Continue(true)
74                } else {
75                    FoldWhile::Done(false)
76                }
77            })
78            .into_inner()
79    }
80
81    /// Calculates the next iteration of the similarity matrix (x[k+1]).
82    pub fn next(&mut self) {
83        {
84            let x = &self.current;
85            for ((i, j), new_x_ij) in self.previous.indexed_iter_mut() {
86                let scale = self
87                    .node_color_matching
88                    .node_color_matching(self.graph_a.node_value(i), self.graph_b.node_value(j));
89                let in_score = s_next(self.graph_a.in_edges_of(i), self.graph_b.in_edges_of(j), x);
90                let out_score = s_next(
91                    self.graph_a.out_edges_of(i),
92                    self.graph_b.out_edges_of(j),
93                    x,
94                );
95                *new_x_ij = in_score.average(out_score).mul(scale).get();
96            }
97        }
98
99        mem::swap(&mut self.previous, &mut self.current);
100        self.num_iterations += 1;
101    }
102
103    /// Iteratively calculate the similarity matrix.
104    ///
105    /// `stop_after_iter`: Stop after iteration (Calculate x(stop_after_iter))
106    /// `eps`:   When to stop the iteration
107    pub fn iterate(&mut self, stop_after_iter: usize, eps: f32) {
108        for _ in 0..stop_after_iter {
109            if self.in_eps(eps) {
110                break;
111            }
112            self.next();
113        }
114    }
115
116    pub fn matrix(&self) -> &Matrix {
117        &self.current
118    }
119
120    pub fn num_iterations(&self) -> usize {
121        self.num_iterations
122    }
123
124    pub fn min_nodes(&self) -> usize {
125        cmp::min(self.current.nrows(), self.current.ncols())
126    }
127
128    pub fn max_nodes(&self) -> usize {
129        cmp::max(self.current.nrows(), self.current.ncols())
130    }
131
132    pub fn optimal_node_assignment(&self) -> Vec<Position> {
133        let n = self.min_nodes();
134        let assignment = if n > 0 {
135            let mut w = WeightMatrix::from_fn(n, |ij| similarity_cost(self.current[ij]));
136            solve_assignment(&mut w).unwrap()
137        } else {
138            Vec::new()
139        };
140        assert!(assignment.len() == n);
141        assignment
142    }
143
144    fn score_optimal_sum(&self, node_assignment: Option<&[Position]>) -> f32 {
145        match node_assignment {
146            Some(node_assignment) => {
147                assert!(node_assignment.len() == self.min_nodes());
148                node_assignment
149                    .iter()
150                    .fold(0.0, |acc, &Position { row, column }| {
151                        acc + self.current[(row, column)]
152                    })
153            }
154            None => {
155                let node_assignment = self.optimal_node_assignment();
156                assert!(node_assignment.len() == self.min_nodes());
157                node_assignment
158                    .iter()
159                    .fold(0.0, |acc, &Position { row, column }| {
160                        acc + self.current[(row, column)]
161                    })
162            }
163        }
164    }
165
166    /// Calculate a measure how good the edge weights match up.
167    ///
168    /// We start by calculating the optimal node assignment between nodes of graph A and graph B,
169    /// then compare all outgoing edges of similar-assigned nodes by again using an assignment
170    /// between the edge-weight differences of all edge pairs.
171    pub fn score_outgoing_edge_weights_sum_norm(
172        &self,
173        node_assignment: &[Position],
174        norm: ScoreNorm,
175    ) -> Closed01<f32> {
176        let n = self.min_nodes();
177        let m = self.max_nodes();
178        debug_assert!(m >= n);
179
180        assert!(node_assignment.len() == n);
181
182        // we sum up all edge weight scores
183        let sum: f32 = node_assignment.iter().fold(
184            0.0,
185            |acc,
186             &Position {
187                 row: node_i,
188                 column: node_j,
189             }| {
190                let score_ij = self.score_outgoing_edge_weights_of(node_i, node_j);
191                acc + score_ij.get()
192            },
193        );
194
195        assert!(sum >= 0.0 && sum <= n as f32);
196
197        match norm {
198            // Not penalize missing nodes.
199            ScoreNorm::MinDegree => Closed01::new(sum / n as f32),
200
201            // To penalize for missing nodes divide by the maximum number of nodes `m`.
202            ScoreNorm::MaxDegree => Closed01::new(sum / m as f32),
203        }
204    }
205
206    /// Calculate a similarity measure of outgoing edges of nodes `node_i` of graph A and `node_j`
207    /// of graph B.  A score of 1.0 means, the edges weights match up perfectly. 0.0 means, no
208    /// similarity.
209    fn score_outgoing_edge_weights_of(&self, node_i: usize, node_j: usize) -> Closed01<f32> {
210        let out_i = self.graph_a.out_edges_of(node_i);
211        let out_j = self.graph_b.out_edges_of(node_j);
212
213        let max_deg = cmp::max(out_i.num_edges(), out_j.num_edges());
214
215        if max_deg == 0 {
216            // Nodes with no edges are perfectly similar
217            return Closed01::one();
218        }
219
220        // Calculates the edge weight distance between edges i and j.
221        let edge_weight_distance = &|(i, j)| {
222            match (out_i.nth_edge_weight(i), out_j.nth_edge_weight(j)) {
223                (Some(w_i), Some(w_j)) => w_i.distance(w_j),
224                _ => {
225                    // Maximum penalty between two weighted edges
226                    // NOTE: missing edges could be penalized more, but we already
227                    // penalize for that in the node similarity measure.
228                    Closed01::one()
229                }
230            }
231            .get()
232        };
233
234        let mut w = WeightMatrix::from_fn(max_deg, edge_weight_distance);
235
236        // calculate optimal edge weight assignement.
237        let assignment = solve_assignment(&mut w).unwrap();
238        assert!(assignment.len() == max_deg);
239
240        // The sum is the sum of all weight differences on the optimal `path`.
241        // It's range is from 0.0 (perfect matching) to max_deg*1.0 (bad matching).
242        let sum: f32 = assignment
243            .iter()
244            .fold(0.0, |acc, &Position { row, column }| {
245                acc + edge_weight_distance((row, column))
246            });
247
248        debug_assert!(sum >= 0.0 && sum <= max_deg as f32);
249
250        // we "invert" the normalized sum so that 1.0 means perfect matching and 0.0
251        // no matching.
252        Closed01::new(sum / max_deg as f32).inv()
253    }
254
255    /// Sums the optimal assignment of the node similarities and normalizes (divides)
256    /// by the min/max degree of both graphs.
257    /// ScoreNorm::MinDegree is used as default in the paper.
258    pub fn score_optimal_sum_norm(
259        &self,
260        node_assignment: Option<&[Position]>,
261        norm: ScoreNorm,
262    ) -> Closed01<f32> {
263        let n = self.min_nodes();
264        let m = self.max_nodes();
265
266        if n > 0 {
267            assert!(m > 0);
268            let sum = self.score_optimal_sum(node_assignment);
269            assert!(sum >= 0.0 && sum <= n as f32);
270
271            match norm {
272                // Not penalize missing nodes.
273                ScoreNorm::MinDegree => Closed01::new(sum / n as f32),
274
275                // To penalize for missing nodes, divide by the maximum number of nodes `m`.
276                ScoreNorm::MaxDegree => Closed01::new(sum / m as f32),
277            }
278        } else {
279            Closed01::zero()
280        }
281    }
282
283    /// Calculates the average over the whole node similarity matrix. This is faster,
284    /// as no assignment has to be found. "Graphs with greater number of automorphisms
285    /// would be considered to be more self-similar than graphs without automorphisms."
286    pub fn score_average(&self) -> Closed01<f32> {
287        let n = self.min_nodes();
288        if n > 0 {
289            let sum: f32 = self.current.iter().fold(0.0, |acc, &v| acc + v);
290            let len = self.current.shape().len();
291            assert!(len > 0);
292            Closed01::new(sum / len as f32)
293        } else {
294            Closed01::zero()
295        }
296    }
297}
298
299/// Calculates the similarity of two nodes `i` and `j`.
300///
301/// `n_i` contains the neighborhood of i (either in or out neighbors, not both)
302/// `n_j` contains the neighborhood of j (either in or out neighbors, not both)
303/// `x`   the similarity matrix.
304fn s_next<T: Edges>(n_i: &T, n_j: &T, x: &Array2<f32>) -> Closed01<f32> {
305    let max_deg = cmp::max(n_i.num_edges(), n_j.num_edges());
306    let min_deg = cmp::min(n_i.num_edges(), n_j.num_edges());
307
308    debug_assert!(min_deg <= max_deg);
309
310    if max_deg == 0 {
311        debug_assert!(n_i.num_edges() == 0);
312        debug_assert!(n_j.num_edges() == 0);
313
314        // in the paper, 0/0 is defined as 1.0
315
316        // Two nodes without any edges are perfectly similar.
317        return Closed01::one();
318    }
319
320    if min_deg == 0 {
321        // A node without any edges is not similar at all to a node with edges.
322        return Closed01::zero();
323    }
324
325    assert!(min_deg > 0 && max_deg > 0);
326
327    // map indicies from 0..min(degree) to the node indices
328    let mapidx = |(a, b)| (n_i.nth_edge(a).unwrap(), n_j.nth_edge(b).unwrap());
329
330    let mut w = WeightMatrix::from_fn(min_deg, |ab| similarity_cost(x[mapidx(ab)]));
331
332    let assignment = solve_assignment(&mut w).unwrap();
333    assert!(assignment.len() == min_deg);
334
335    let sum: f32 = assignment
336        .iter()
337        .fold(0.0, |acc, &Position { row, column }| {
338            acc + x[mapidx((row, column))]
339        });
340
341    Closed01::new(sum / max_deg as f32)
342}
343
344// NOTE: Our weight matrix minimizes the cost, while our similarity matrix
345// wants to maximize the similarity score. That's why we have to convert
346// the cost with 1.0 - x.
347fn similarity_cost(weight: f32) -> f32 {
348    debug_assert!(weight >= 0.0 && weight <= 1.0);
349    1.0 - weight
350}