1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
//------------------------------------------------------------------------------
// LAGr_Modularity: computes modularity of a graph clustering
//------------------------------------------------------------------------------
// LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
// SPDX-License-Identifier: BSD-2-Clause
//
// For additional details (including references to third party source code and
// other files) see the LICENSE file or contact permission@sei.cmu.edu. See
// Contributors.txt for a full list of contributors. Created, in part, with
// funding and support from the U.S. Government (see Acknowledgments.txt file).
// DM22-0790
// Contributed by Cameron Quilici, Texas A&M University
//------------------------------------------------------------------------------
// TODO: ready to consider for src
// The modularity (Q) of a graph clustering C is defined as (directed case):
//
// Q = \sum{c = 1}^n [\frac{L_c}{m} - \gamma(\frac{k_c^{in} k_c^{out}}{m})^2]
//
// where the sum iterates over all communities c = 1,...,n, L_c is the the
// number of total edges in cluster c, m is the total number of edges in the
// graph, k_c^{in} and k_c^{out} are the total in and out degrees of cluster c,
// and \gamma is the resolution parameter which controls the relative importance
// of the intra-cluster and inter-cluster edges.
// Modularity works by comparing the intra-cluster density of a particular
// clustering to that of a random graph with the same degree distribution. The
// range of Q is [-.5, 1]. When Q ~ 0, this indicates that the clustering is
// not much better than at random whereas Q ~ 1 indicates a strong community
// structure.
// https://arxiv.org/abs/0906.0612 pp. 15-16
#define LG_FREE_WORK \
{ \
GrB_free(&l); \
GrB_free(&vmask); \
GrB_free(&k_in); \
GrB_free(&k_out); \
GrB_free(&in_degree); \
GrB_free(&out_degree); \
GrB_free(&A); \
GrB_free(&CA); \
GrB_free(&C); \
GrB_free(&ONE_INT64); \
LAGraph_Free((void **)&lX, NULL); \
LAGraph_Free((void **)&k_outX, NULL); \
LAGraph_Free((void **)&k_inX, NULL); \
}
#define LG_FREE_ALL \
{ \
LG_FREE_WORK; \
}
#include "LG_internal.h"
#include <LAGraphX.h>
int LAGr_Modularity(
// Outputs
double *mod_handle, // output modularity
// Inputs
double resolution, // resolution parameter
GrB_Vector c, // input cluster vector
LAGraph_Graph G, // original graph from which clustering was obtained
char *msg)
{
#if LAGRAPH_SUITESPARSE
GrB_Vector l = NULL;
GrB_Vector vmask = NULL;
GrB_Vector k_in = NULL, k_out = NULL;
GrB_Vector out_degree = NULL, in_degree = NULL;
GrB_Matrix C = NULL, CA = NULL, A = NULL;
GrB_Scalar ONE_INT64 = NULL;
GrB_Index *lX = NULL, *k_outX = NULL, *k_inX = NULL;
//--------------------------------------------------------------------------
// check inputs
//--------------------------------------------------------------------------
LG_CLEAR_MSG;
LG_ASSERT_MSG(mod_handle != NULL, GrB_NULL_POINTER, "mod_handle is NULL");
LG_ASSERT_MSG(resolution >= 0, GrB_INVALID_VALUE,
"resolution parameter must be non-negative");
LG_TRY(LAGraph_CheckGraph(G, msg));
GrB_Index n, nedges;
GRB_TRY(GrB_Matrix_nrows(&n, G->A));
// Do not consider edge weights for modularity
// FUTURE: There is a way to consider edge weights in modularity calculations,
// so could give users the option to pass in a 'weights' parameter specifying
// that edge weights should be used in the calculations.
GRB_TRY(GrB_Matrix_new(&A, GrB_INT64, n, n));
GRB_TRY(GrB_apply(A, NULL, NULL, GxB_ONE_INT64, G->A, NULL));
// remove self-edges, not relevant to clustering metrics
GRB_TRY(GrB_select(A, NULL, NULL, GrB_OFFDIAG, A, 0, NULL));
GRB_TRY(GrB_Matrix_nvals(&nedges, A));
//--------------------------------------------------------------------------
// initializations
//--------------------------------------------------------------------------
GRB_TRY(GrB_Matrix_new(&C, GrB_INT64, n, n));
GRB_TRY(GrB_Matrix_new(&CA, GrB_INT64, n, n));
GRB_TRY(GrB_Vector_new(&l, GrB_INT64, n));
GRB_TRY(GrB_Vector_new(&vmask, GrB_INT64, n));
GRB_TRY(GrB_Vector_new(&k_in, GrB_INT64, n));
GRB_TRY(GrB_Vector_new(&k_out, GrB_INT64, n));
GRB_TRY(GrB_Vector_new(&out_degree, GrB_INT64, n));
GRB_TRY(GrB_Vector_new(&in_degree, GrB_INT64, n));
GRB_TRY(GrB_Scalar_new(&ONE_INT64, GrB_INT64));
GRB_TRY(GrB_Scalar_setElement_INT64(ONE_INT64, (int64_t)1));
// Convert the cluster vector to a uint64_t matrix C where
// C(i, j) = 1 if and only if vertex j is in cluster i
GrB_Index *cI, *cX;
LAGRAPH_TRY(LAGraph_Malloc((void **)&cI, n, sizeof(GrB_Index), msg));
LAGRAPH_TRY(LAGraph_Malloc((void **)&cX, n, sizeof(GrB_Index), msg));
GRB_TRY(GrB_Vector_extractTuples_INT64(cI, (int64_t *) cX, &n, c));
GRB_TRY(GxB_Matrix_build_Scalar(C, cX, cI, ONE_INT64, n));
GrB_Matrix_wait(C, GrB_MATERIALIZE);
LAGraph_Free((void **)&cI, NULL);
LAGraph_Free((void **)&cX, NULL);
// Calculate actual number of intra-cluster edges
GRB_TRY(GrB_mxm(CA, NULL, NULL, LAGraph_plus_one_int64, C, A, NULL));
GRB_TRY(GrB_mxm(CA, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_INT64, CA, C,
GrB_DESC_T1));
GRB_TRY(GxB_Vector_diag(l, CA, 0, NULL));
// Calculate the combined degree for each cluster
GRB_TRY(GrB_reduce(out_degree, NULL, NULL, GrB_PLUS_MONOID_INT64, A, NULL));
GRB_TRY(GrB_reduce(in_degree, NULL, NULL, GrB_PLUS_MONOID_INT64, A,
GrB_DESC_T0));
GRB_TRY(GrB_mxv(k_out, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_INT64, C,
out_degree, NULL));
GRB_TRY(GrB_mxv(k_in, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_INT64, C,
in_degree, NULL));
// vmask (i) = 0 if cluster i is non-empty (has any vertices)
GRB_TRY(GrB_reduce(vmask, NULL, NULL, GrB_LOR_MONOID_BOOL, C, NULL));
GRB_TRY(GrB_apply(vmask, vmask, NULL, GxB_LNOT_BOOL, vmask, NULL));
// If any of the above vectors have fewer entries than nclusters, this means
// that there are singleton clusters with one vertex/no out-degree/no
// in-degree. So we need to add explicit zeros wherever values are missing
// for further calculations.
GrB_Index nclusters, nl, nk_out, nk_in;
GRB_TRY(GrB_Vector_nvals(&nclusters, vmask));
GRB_TRY(GrB_Vector_nvals(&nl, l));
GRB_TRY(GrB_Vector_nvals(&nk_out, l));
GRB_TRY(GrB_Vector_nvals(&nk_in, l));
if (nclusters != nl)
{
GRB_TRY(GrB_assign(l, l, NULL, vmask, GrB_ALL, nclusters, GrB_DESC_SC));
}
if (nclusters != nk_out)
{
GRB_TRY(GrB_assign(k_out, k_out, NULL, vmask, GrB_ALL, nclusters,
GrB_DESC_SC));
}
if (nclusters != nk_in)
{
GRB_TRY(GrB_assign(k_in, k_in, NULL, vmask, GrB_ALL, nclusters,
GrB_DESC_SC));
}
// Extract actual values of l, k_out, and k_in for modularity calculations
LAGRAPH_TRY(
LAGraph_Malloc((void **)&lX, nclusters, sizeof(GrB_Index), msg));
LAGRAPH_TRY(
LAGraph_Malloc((void **)&k_outX, nclusters, sizeof(GrB_Index), msg));
LAGRAPH_TRY(
LAGraph_Malloc((void **)&k_inX, nclusters, sizeof(GrB_Index), msg));
GRB_TRY(GrB_Vector_extractTuples_INT64(NULL, (int64_t *) lX, &nclusters, l));
GRB_TRY(GrB_Vector_extractTuples_INT64(NULL, (int64_t *) k_outX, &nclusters, k_out));
GRB_TRY(GrB_Vector_extractTuples_INT64(NULL, (int64_t *) k_inX, &nclusters, k_in));
GrB_Index m, out_degree_sum, in_degree_sum, L_c;
GRB_TRY(GrB_reduce(&out_degree_sum, NULL, GrB_PLUS_MONOID_INT64, out_degree,
NULL));
m = out_degree_sum;
double norm = 1.0 / (m * m);
// compute modularity
double mod = 0.0;
for (int c = 0; c < nclusters; c++)
{
mod += (1.0 * lX[c] / nedges) -
(resolution * ((k_outX[c] * k_inX[c]) * norm));
}
(*mod_handle) = mod;
LG_FREE_WORK;
return (GrB_SUCCESS);
#else
return (GrB_NOT_IMPLEMENTED);
#endif
}