lib_dachshund/dachshund/algorithms/
clustering.rs

1/*
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7extern crate nalgebra as na;
8use crate::dachshund::graph_base::GraphBase;
9use crate::dachshund::id_types::NodeId;
10use crate::dachshund::node::{NodeBase, NodeEdgeBase};
11use fxhash::FxHashSet;
12use rand::prelude::*;
13use rand::Rng;
14
15pub trait Clustering: GraphBase
16where
17    Self::NodeType: NodeBase<NodeIdType = NodeId, NodeSetType = FxHashSet<NodeId>>,
18    <Self::NodeType as NodeBase>::NodeEdgeType: NodeEdgeBase<NodeIdType = NodeId>,
19{
20    fn get_clustering_coefficient(&self, id: NodeId) -> Option<f64> {
21        let node = self.get_node(id);
22        let mut neighbor_ids: FxHashSet<NodeId> = FxHashSet::default();
23        for ne in node.get_edges() {
24            neighbor_ids.insert(ne.get_neighbor_id());
25        }
26        let num_neighbors: usize = neighbor_ids.len();
27        if num_neighbors <= 1 {
28            return None;
29        }
30        let mut num_ties: usize = 0;
31        for ne in node.get_edges() {
32            let neighbor = &self.get_node(ne.get_neighbor_id());
33            num_ties += neighbor.count_ties_with_ids(&neighbor_ids);
34        }
35        // different from degree -- this is the number of distinct neighbors,
36        // not the number of edges -- a neighbor may be connected by multiple
37        // edges.
38        Some(num_ties as f64 / ((num_neighbors * (num_neighbors - 1)) as f64))
39    }
40    fn get_avg_clustering(&self) -> f64 {
41        let coefs = self
42            .get_ids_iter()
43            .filter_map(|x| self.get_clustering_coefficient(*x))
44            .collect::<Vec<f64>>();
45        Iterator::sum::<f64>(coefs.iter()) / coefs.len() as f64
46    }
47    // Approximate Clustering - Randomly sample neighbors of nodes w/ degree at least 2.
48    // k~=26,000 gives an approximation w/ <1% chance of an error of more than 1 percentage point.
49    // See http://jgaa.info/accepted/2005/SchankWagner2005.9.2.pdf for approximation guarantees.
50    fn get_approx_avg_clustering(&self, samples: usize) -> f64 {
51        let ordered_nodes = self
52            .get_nodes_iter()
53            .filter(|node| node.degree() >= 2)
54            .collect::<Vec<_>>();
55
56        let n = ordered_nodes.len();
57        let mut successes = 0;
58        let mut rng = rand::thread_rng();
59
60        for _i in 0..samples {
61            // Pick a random node with degree at least 2.
62            let v = &ordered_nodes[rng.gen_range(0..n)];
63
64            // Choose 2 random nodes that are neighbors of j
65            let mut random_neighbors = v.get_edges().choose_multiple(&mut rng, 2).into_iter();
66            let next_random_neighbor = random_neighbors.next();
67            let u_id = next_random_neighbor.unwrap().get_neighbor_id();
68            let w_id = random_neighbors.next().unwrap().get_neighbor_id();
69
70            // If they're connected, increment l.
71            // TODO: No O(1) way to check if there's an edge?
72            for edge in self.get_node(u_id).get_edges() {
73                if edge.get_neighbor_id() == w_id {
74                    successes += 1;
75                    break;
76                }
77            }
78        }
79        (successes as f64) / (samples as f64)
80    }
81}