#define LG_FREE_ALL ;
#include "LG_internal.h"
#if LAGRAPH_SUITESPARSE
#define HASH_SIZE 1024
#define HASH_SAMPLES 864
#define HASH(x) (((x << 4) + x) & (HASH_SIZE-1))
#define NEXT(x) ((x + 23) & (HASH_SIZE-1))
static inline void ht_init
(
int32_t *ht_key,
int32_t *ht_val
)
{
memset (ht_key, -1, sizeof (int32_t) * HASH_SIZE) ;
memset (ht_val, 0, sizeof (int32_t) * HASH_SIZE) ;
}
static inline void ht_sample
(
uint32_t *V32, int32_t n,
int32_t samples, int32_t *ht_key,
int32_t *ht_val,
uint64_t *seed
)
{
for (int32_t k = 0 ; k < samples ; k++)
{
int32_t x = V32 [LG_Random64 (seed) % n] ;
int32_t h = HASH (x) ;
while (ht_key [h] != -1 && ht_key [h] != x)
{
h = NEXT (h) ;
}
ht_key [h] = x ;
ht_val [h]++ ;
}
}
static inline int32_t ht_most_frequent
(
int32_t *ht_key,
int32_t *ht_val
)
{
int32_t key = -1 ;
int32_t val = 0 ; for (int32_t h = 0 ; h < HASH_SIZE ; h++)
{
if (ht_val [h] > val)
{
key = ht_key [h] ;
val = ht_val [h] ;
}
}
return (key) ; }
static inline int Reduce_assign32
(
GrB_Vector *w_handle, GrB_Vector *s_handle, uint32_t *index, GrB_Index n,
int nthreads,
int32_t *ht_key, int32_t *ht_val, uint64_t *seed, char *msg
)
{
GrB_Type w_type, s_type ;
GrB_Index w_n, s_n, w_nvals, s_nvals, *w_i, *s_i, w_size, s_size ;
uint32_t *w_x, *s_x ;
bool s_iso = false ;
GRB_TRY (GxB_Vector_export_Full (w_handle, &w_type, &w_n, (void **) &w_x,
&w_size, NULL, NULL)) ;
GRB_TRY (GxB_Vector_export_Full (s_handle, &s_type, &s_n, (void **) &s_x,
&s_size, &s_iso, NULL)) ;
#if defined ( COVERAGE )
if (n >= 200) #else
if (nthreads >= 4)
#endif
{
uint32_t *mem ;
LAGRAPH_TRY (LAGraph_Malloc ((void **) &mem, nthreads*HASH_SIZE,
sizeof (uint32_t), msg)) ;
ht_init (ht_key, ht_val) ;
ht_sample (index, n, HASH_SAMPLES, ht_key, ht_val, seed) ;
int tid ;
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (tid = 0 ; tid < nthreads ; tid++)
{
uint32_t *buf = mem + tid * HASH_SIZE ;
for (int32_t h = 0 ; h < HASH_SIZE ; h++)
{
if (ht_key [h] != -1)
{
buf [h] = w_x [ht_key [h]] ;
}
}
int32_t kstart = (n * tid + nthreads - 1) / nthreads ;
int32_t kend = (n * tid + n + nthreads - 1) / nthreads ;
for (int32_t k = kstart ; k < kend ; k++)
{
uint32_t i = index [k] ;
int32_t h = HASH (i) ;
while (ht_key [h] != -1 && ht_key [h] != i)
{
h = NEXT (h) ;
}
if (ht_key [h] == -1)
{
w_x [i] = LAGRAPH_MIN (w_x [i], s_x [s_iso?0:k]) ;
}
else
{
buf [h] = LAGRAPH_MIN (buf [h], s_x [s_iso?0:k]) ;
}
}
}
for (int32_t h = 0 ; h < HASH_SIZE ; h++)
{
int32_t i = ht_key [h] ;
if (i != -1)
{
for (tid = 0 ; tid < nthreads ; tid++)
{
w_x [i] = LAGRAPH_MIN (w_x [i], mem [tid * HASH_SIZE + h]) ;
}
}
}
LAGraph_Free ((void **) &mem, NULL) ;
}
else
{
for (int64_t k = 0 ; k < n ; k++)
{
uint32_t i = index [k] ;
w_x [i] = LAGRAPH_MIN (w_x [i], s_x [s_iso?0:k]) ;
}
}
GRB_TRY (GxB_Vector_import_Full (w_handle, w_type, w_n, (void **) &w_x,
w_size, false, NULL)) ;
GRB_TRY (GxB_Vector_import_Full (s_handle, s_type, s_n, (void **) &s_x,
s_size, s_iso, NULL)) ;
return (GrB_SUCCESS) ;
}
#endif
#undef LG_FREE_ALL
#define LG_FREE_ALL \
{ \
LAGraph_Free ((void **) &I, NULL) ; \
LAGraph_Free ((void **) &V32, NULL) ; \
LAGraph_Free ((void **) &ht_key, NULL) ; \
LAGraph_Free ((void **) &ht_val, NULL) ; \
\
GrB_free (&f) ; \
GrB_free (&gp) ; \
GrB_free (&mngp) ; \
GrB_free (&gp_new) ; \
GrB_free (&mod) ; \
}
int LG_CC_FastSV5 (
GrB_Vector *component, LAGraph_Graph G, char *msg
)
{
#if LAGRAPH_SUITESPARSE
LG_CLEAR_MSG ;
uint32_t *V32 = NULL ;
int32_t *ht_key = NULL, *ht_val = NULL ;
GrB_Index n, nnz, *I = NULL ;
GrB_Vector f = NULL, gp_new = NULL, mngp = NULL, mod = NULL, gp = NULL ;
GrB_Matrix T = NULL ;
LG_TRY (LAGraph_CheckGraph (G, msg)) ;
LG_ASSERT (component != NULL, GrB_NULL_POINTER) ;
LG_ASSERT_MSG ((G->kind == LAGraph_ADJACENCY_UNDIRECTED ||
(G->kind == LAGraph_ADJACENCY_DIRECTED &&
G->is_symmetric_structure == LAGraph_TRUE)),
-1001, "G->A must be known to be symmetric") ;
GrB_Matrix S = G->A ;
GRB_TRY (GrB_Matrix_nrows (&n, S)) ;
GRB_TRY (GrB_Matrix_nvals (&nnz, S)) ;
LG_ASSERT_MSG (n <= UINT32_MAX, -1, "problem too large (fixme)") ;
#define FASTSV_SAMPLES 4
bool sampling = (n * FASTSV_SAMPLES * 2 < nnz) ;
uint64_t seed = n ;
int nthreads, nthreads_outer, nthreads_inner ;
LG_TRY (LAGraph_GetNumThreads (&nthreads_outer, &nthreads_inner, msg)) ;
nthreads = nthreads_outer * nthreads_inner ;
nthreads = LAGRAPH_MIN (nthreads, n / 16) ;
nthreads = LAGRAPH_MAX (nthreads, 1) ;
int nthreads2 = n / (64*1024) ;
nthreads2 = LAGRAPH_MIN (nthreads2, nthreads) ;
nthreads2 = LAGRAPH_MAX (nthreads2, 1) ;
GRB_TRY (GrB_Vector_new (&f, GrB_UINT32, n)) ;
GRB_TRY (GrB_Vector_new (&gp_new, GrB_UINT32, n)) ;
GRB_TRY (GrB_Vector_new (&mod, GrB_BOOL, n)) ;
LAGRAPH_TRY (LAGraph_Malloc ((void **) &I , n, sizeof (GrB_Index), msg)) ;
LAGRAPH_TRY (LAGraph_Malloc ((void **) &V32, n, sizeof (uint32_t), msg)) ;
int64_t i ;
#pragma omp parallel for num_threads(nthreads2) schedule(static)
for (i = 0 ; i < n ; i++)
{
I [i] = i ;
V32 [i] = (uint32_t) i ;
}
GRB_TRY (GrB_Vector_build (f, I, V32, n, GrB_PLUS_UINT32)) ;
GRB_TRY (GrB_Vector_dup (&gp, f)) ;
GRB_TRY (GrB_Vector_dup (&mngp, f)) ;
LAGRAPH_TRY (LAGraph_Malloc ((void **) &ht_key, HASH_SIZE, sizeof (int32_t),
msg)) ;
LAGRAPH_TRY (LAGraph_Malloc ((void **) &ht_val, HASH_SIZE, sizeof (int32_t),
msg)) ;
if (sampling)
{
GrB_Type type ;
GrB_Index nrows, ncols, nvals ;
size_t typesize ;
int64_t nonempty ;
GrB_Index *Sp, *Sj ;
void *Sx ;
bool S_jumbled = false ;
GrB_Index Sp_size, Sj_size, Sx_size ;
bool S_iso = false ;
GRB_TRY (GrB_Matrix_nvals (&nvals, S)) ;
GRB_TRY (GxB_Matrix_export_CSR (&S, &type, &nrows, &ncols, &Sp, &Sj,
&Sx, &Sp_size, &Sj_size, &Sx_size,
&S_iso, &S_jumbled, NULL)) ;
LAGRAPH_TRY (LAGraph_SizeOfType (&typesize, type, msg)) ;
G->A = NULL ;
GrB_Index Tp_len = nrows+1, Tp_size = Tp_len*sizeof(GrB_Index);
GrB_Index Tj_len = nvals, Tj_size = Tj_len*sizeof(GrB_Index);
GrB_Index Tx_len = nvals ;
GrB_Index *Tp = NULL, *Tj = NULL ;
GrB_Index Tx_size = typesize ;
void *Tx = NULL ;
int32_t *range = NULL ;
GrB_Index *count = NULL ;
LAGRAPH_TRY (LAGraph_Malloc ((void **) &Tp, Tp_len,
sizeof (GrB_Index), msg)) ;
LAGRAPH_TRY (LAGraph_Malloc ((void **) &Tj, Tj_len,
sizeof (GrB_Index), msg)) ;
LAGRAPH_TRY (LAGraph_Calloc (&Tx, 1, typesize, msg)) ;
LAGRAPH_TRY (LAGraph_Malloc ((void **) &range, nthreads + 1,
sizeof (int32_t), msg)) ;
LAGRAPH_TRY (LAGraph_Malloc ((void **) &count, nthreads + 1,
sizeof (GrB_Index), msg)) ;
memset (count, 0, sizeof (GrB_Index) * (nthreads + 1)) ;
int tid ;
for (tid = 0 ; tid <= nthreads ; tid++)
{
range [tid] = (n * tid + nthreads - 1) / nthreads ;
}
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (tid = 0 ; tid < nthreads ; tid++)
{
for (int32_t i = range [tid] ; i < range [tid+1] ; i++)
{
int32_t deg = Sp [i + 1] - Sp [i] ;
count [tid + 1] += LAGRAPH_MIN (FASTSV_SAMPLES, deg) ;
}
}
for (tid = 0 ; tid < nthreads ; tid++)
{
count [tid + 1] += count [tid] ;
}
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (tid = 0 ; tid < nthreads ; tid++)
{
GrB_Index p = count [tid] ;
Tp [range [tid]] = p ;
for (int32_t i = range [tid] ; i < range [tid+1] ; i++)
{
for (int32_t j = 0 ;
j < FASTSV_SAMPLES && Sp [i] + j < Sp [i + 1] ; j++)
{
Tj [p++] = Sj [Sp [i] + j] ;
}
Tp [i + 1] = p ;
}
}
GrB_Index Tp_siz = Tp_size ;
GrB_Index Tj_siz = Tj_size ;
GrB_Index Tx_siz = Tx_size ;
GrB_Index t_nvals = Tp [nrows] ;
GRB_TRY (GxB_Matrix_import_CSR (&T, type, nrows, ncols,
&Tp, &Tj, &Tx, Tp_siz, Tj_siz, Tx_siz,
true, S_jumbled, NULL)) ;
bool change = true, is_first = true ;
while (change)
{
GRB_TRY (GrB_mxv (mngp, NULL, GrB_MIN_UINT32,
GrB_MIN_SECOND_SEMIRING_UINT32, T, gp, NULL)) ;
if (!is_first)
{
LG_TRY (Reduce_assign32 (&f, &mngp, V32, n, nthreads,
ht_key, ht_val, &seed, msg)) ;
}
GRB_TRY (GrB_eWiseAdd (f, NULL, GrB_MIN_UINT32, GrB_MIN_UINT32,
mngp, gp, NULL)) ;
GRB_TRY (GrB_Vector_extractTuples (NULL, V32, &n, f)) ; int64_t i ;
#pragma omp parallel for num_threads(nthreads2) schedule(static)
for (i = 0 ; i < n ; i++)
{
I [i] = (GrB_Index) V32 [i] ;
}
GRB_TRY (GrB_extract (gp_new, NULL, NULL, f, I, n, NULL)) ;
GRB_TRY (GrB_eWiseMult (mod, NULL, NULL, GrB_NE_UINT32, gp_new,
gp, NULL)) ;
GRB_TRY (GrB_reduce (&change, NULL, GrB_LOR_MONOID_BOOL, mod,
NULL)) ;
GrB_Vector t = gp ; gp = gp_new ; gp_new = t ;
is_first = false ;
}
ht_init (ht_key, ht_val) ;
ht_sample (V32, n, HASH_SAMPLES, ht_key, ht_val, &seed) ;
int32_t key = ht_most_frequent (ht_key, ht_val) ;
int64_t t_nonempty = -1 ;
bool T_jumbled = false, T_iso = true ;
GRB_TRY (GxB_Matrix_export_CSR (&T, &type, &nrows, &ncols, &Tp, &Tj,
&Tx, &Tp_siz, &Tj_siz, &Tx_siz,
&T_iso, &T_jumbled, NULL)) ;
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (tid = 0 ; tid < nthreads ; tid++)
{
GrB_Index ptr = Sp [range [tid]] ;
for (int32_t i = range [tid] ; i < range [tid+1] ; i++)
{
int32_t pv = V32 [i] ; Tp [i] = ptr ; if (pv != key)
{
for (GrB_Index p = Sp [i] ; p < Sp [i+1] ; p++)
{
int32_t j = Sj [p] ;
if (V32 [j] != key)
{
Tj [ptr++] = j ;
}
}
if (ptr - Tp [i] < Sp [i+1] - Sp [i])
{
Tj [ptr++] = key ;
}
}
}
count [tid] = ptr - Tp [range [tid]] ;
}
GrB_Index offset = 0 ;
for (tid = 0 ; tid < nthreads ; tid++)
{
memmove (Tj + offset, Tj + Tp [range [tid]],
sizeof (GrB_Index) * count [tid]) ;
offset += count [tid] ;
count [tid] = offset - count [tid] ;
}
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (tid = 0 ; tid < nthreads ; tid++)
{
GrB_Index ptr = Tp [range [tid]] ;
for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
{
Tp [i] -= ptr - count [tid] ;
}
}
Tp [n] = offset ;
LAGraph_Free ((void **) &count, NULL) ;
LAGraph_Free ((void **) &range, NULL) ;
GRB_TRY (GxB_Matrix_import_CSR (&S, type, nrows, ncols,
&Sp, &Sj, &Sx, Sp_size, Sj_size, Sx_size,
S_iso, S_jumbled, NULL)) ;
GRB_TRY (GxB_Matrix_import_CSR (&T, type, nrows, ncols,
&Tp, &Tj, &Tx, Tp_siz, Tj_siz, Tx_siz,
T_iso, true, NULL)) ;
G->A = S ;
}
else
{
T = S ;
}
GRB_TRY (GrB_Matrix_nvals (&nnz, T)) ;
bool change = true ;
while (change && nnz > 0)
{
GRB_TRY (GrB_mxv (mngp, NULL, GrB_MIN_UINT32,
GrB_MIN_SECOND_SEMIRING_UINT32, T, gp, NULL)) ;
GRB_TRY (Reduce_assign32 (&f, &mngp, V32, n, nthreads, ht_key,
ht_val, &seed, msg)) ;
GRB_TRY (GrB_eWiseAdd (f, NULL, GrB_MIN_UINT32, GrB_MIN_UINT32,
mngp, gp, NULL)) ;
GRB_TRY (GrB_Vector_extractTuples (NULL, V32, &n, f)) ; int64_t k ;
#pragma omp parallel for num_threads(nthreads2) schedule(static)
for (k = 0 ; k < n ; k++)
{
I [k] = (GrB_Index) V32 [k] ;
}
GRB_TRY (GrB_extract (gp_new, NULL, NULL, f, I, n, NULL)) ;
GRB_TRY (GrB_eWiseMult (mod, NULL, NULL, GrB_NE_UINT32, gp_new, gp,
NULL)) ;
GRB_TRY (GrB_reduce (&change, NULL, GrB_LOR_MONOID_BOOL, mod, NULL)) ;
GrB_Vector t = gp ; gp = gp_new ; gp_new = t ;
}
(*component) = f ;
f = NULL ;
if (sampling)
{
GrB_free (&T) ;
}
LG_FREE_ALL ;
return (GrB_SUCCESS) ;
#else
return (GrB_NOT_IMPLEMENTED) ;
#endif
}