#define __STDC_WANT_LIB_EXT1__ 1
#include <string.h>
#define LG_FREE_ALL ;
#include "LG_internal.h"
static double timings [16] ;
#if LG_SUITESPARSE_GRAPHBLAS_V10
static inline GrB_Info fastsv
(
GrB_Matrix A, GrB_Vector parent2, GrB_Vector mngp, GrB_Vector *gp, GrB_Vector *gp_new, GrB_Vector t, GrB_BinaryOp eq, GrB_BinaryOp min, GrB_Semiring min_2nd, GrB_Matrix Parent, GxB_Container Parent_Container, char *msg
)
{
GrB_Index n = Parent_Container->nrows ;
GrB_Vector parent = Parent_Container->i ;
bool done = false ;
while (true)
{
GRB_TRY (GrB_mxv (mngp, NULL, min, min_2nd, A, *gp, NULL)) ;
GRB_TRY (GrB_eWiseAdd (parent2, NULL, NULL, min, mngp, *gp, NULL)) ;
GRB_TRY (GxB_load_Matrix_from_Container (Parent, Parent_Container,
NULL)) ;
GRB_TRY (GrB_mxv (parent2, NULL, min, min_2nd, Parent, mngp, NULL)) ;
GRB_TRY (GxB_unload_Matrix_into_Container (Parent, Parent_Container,
NULL)) ;
GRB_TRY (GrB_assign (parent, NULL, min, parent2, GrB_ALL, n, NULL)) ;
GRB_TRY (GrB_extract (*gp_new, NULL, NULL, parent, parent, NULL)) ;
GRB_TRY (GrB_eWiseMult (t, NULL, NULL, eq, *gp_new, *gp, NULL)) ;
GRB_TRY (GrB_reduce (&done, NULL, GrB_LAND_MONOID_BOOL, t, NULL)) ;
if (done) break ;
GrB_Vector s = (*gp) ; (*gp) = (*gp_new) ; (*gp_new) = s ;
}
return (GrB_SUCCESS) ;
}
#undef LG_FREE_WORK
#define LG_FREE_WORK \
{ \
LAGraph_Free ((void **) &Tp, NULL) ; \
LAGraph_Free ((void **) &Tj, NULL) ; \
LAGraph_Free ((void **) &Tx, NULL) ; \
LAGraph_Free ((void **) &ht_key, NULL) ; \
LAGraph_Free ((void **) &ht_count, NULL) ; \
LAGraph_Free ((void **) &count, NULL) ; \
LAGraph_Free ((void **) &range, NULL) ; \
LAGraph_Free ((void **) &Px, NULL) ; \
GrB_free (&T) ; \
GrB_free (&t) ; \
GrB_free (&gp) ; \
GrB_free (&mngp) ; \
GrB_free (&gp_new) ; \
GrB_free (&parent2) ; \
GrB_free (&Parent) ; \
GrB_free (&Parent_Container) ; \
GrB_free (&A_Container) ; \
GrB_free (&T_Container) ; \
}
#undef LG_FREE_ALL
#define LG_FREE_ALL \
{ \
LG_FREE_WORK ; \
}
#endif
#define AP(k) (Ap32 ? Ap32 [k] : Ap64 [k])
#define AJ(k) (Aj32 ? Aj32 [k] : Aj64 [k])
#define PARENT(i) (Px32 ? Px32 [i] : Px64 [i])
#define TP(k) (Tp32 ? Tp32 [k] : Tp64 [k])
#define TJ(k) (Tj32 ? Tj32 [k] : Tj64 [k])
#define SET_TP(k,p) { if (Tp32) { Tp32 [k] = p ; } else { Tp64 [k] = p ; }}
#define SET_TJ(k,i) { if (Tj32) { Tj32 [k] = i ; } else { Tj64 [k] = i ; }}
#ifdef TIMINGS
static void print_timings (double timings [16])
{
double total = timings [0] + timings [1] + timings [2] ;
printf ("SV7 %12.6f (%4.1f%%) init\n", timings [0], 100. * timings [0] / total) ;
printf ("SV7 %12.6f (%4.1f%%) total sampling:\n", timings [1], 100. * timings [1] / total) ;
printf ("SV7 %12.6f (%4.1f%%) setup T\n", timings [3], 100. * timings [3] / total) ;
printf ("SV7 %12.6f (%4.1f%%) create T\n", timings [4], 100. * timings [4] / total) ;
printf ("SV7 %12.6f (%4.1f%%) fastsv sample\n", timings [5], 100 * timings [5] / total) ;
printf ("SV7 %12.6f (%4.1f%%) hash\n", timings [6], 100. * timings [6] / total) ;
printf ("SV7 %12.6f (%4.1f%%) prune\n", timings [7], 100. * timings [7] / total) ;
printf ("SV7 %12.6f (%4.1f%%) total final\n", timings [2], 100. * timings [2] / total) ;
}
#endif
int LG_CC_FastSV7 (
GrB_Vector *component, LAGraph_Graph G, char *msg
)
{
#if LG_SUITESPARSE_GRAPHBLAS_V10
LG_CLEAR_MSG ;
#ifdef TIMINGS
double timings [16] ;
for (int kk = 0 ; kk < 16 ; kk++) timings [kk] = 0 ;
double tic = LAGraph_WallClockTime ( ) ;
LG_SET_BURBLE (false) ;
#endif
int64_t *range = NULL ;
void *Px = NULL ;
uint64_t Px_size = 0 ;
GrB_Index n, nvals, *ht_key = NULL, *count = NULL ;
void *Tp = NULL, *Tj = NULL ;
GrB_Vector parent = NULL, gp_new = NULL, mngp = NULL, gp = NULL, t = NULL,
parent2 = NULL ;
GrB_Matrix T = NULL, Parent = NULL ;
void *Tx = NULL ;
int *ht_count = NULL ;
GxB_Container Parent_Container = NULL,
A_Container = NULL, T_Container = NULL ;
LG_TRY (LAGraph_CheckGraph (G, msg)) ;
LG_ASSERT (component != NULL, GrB_NULL_POINTER) ;
(*component) = NULL ;
LG_ASSERT_MSG ((G->kind == LAGraph_ADJACENCY_UNDIRECTED ||
(G->kind == LAGraph_ADJACENCY_DIRECTED &&
G->is_symmetric_structure == LAGraph_TRUE)),
LAGRAPH_SYMMETRIC_STRUCTURE_REQUIRED,
"G->A must be known to be symmetric") ;
GrB_Matrix A = G->A ;
GRB_TRY (GrB_Matrix_nrows (&n, A)) ;
GRB_TRY (GrB_Matrix_nvals (&nvals, A)) ;
GrB_Type Uint, Int ;
GrB_IndexUnaryOp ramp ;
GrB_Semiring min_2nd, min_2ndi ;
GrB_BinaryOp min, eq, imin ;
#ifdef COVERAGE
#define NBIG 100
#else
#define NBIG INT32_MAX
#endif
if (n > NBIG)
{
Uint = GrB_UINT64 ;
Int = GrB_INT64 ;
ramp = GrB_ROWINDEX_INT64 ;
min = GrB_MIN_INT64 ;
imin = GrB_MIN_INT64 ;
eq = GrB_EQ_INT64 ;
min_2nd = GrB_MIN_SECOND_SEMIRING_INT64 ;
min_2ndi = GxB_MIN_SECONDI_INT64 ;
}
else
{
Uint = GrB_UINT32 ;
Int = GrB_INT32 ;
ramp = GrB_ROWINDEX_INT32 ;
min = GrB_MIN_INT32 ;
imin = GrB_MIN_INT32 ;
eq = GrB_EQ_INT32 ;
min_2nd = GrB_MIN_SECOND_SEMIRING_INT32 ;
min_2ndi = GxB_MIN_SECONDI_INT32 ;
}
#define FASTSV_SAMPLES 4
bool sampling = (nvals > n * FASTSV_SAMPLES * 2 && n > 1024) ;
int nthreads, nthreads_outer, nthreads_inner ;
LG_TRY (LAGraph_GetNumThreads (&nthreads_outer, &nthreads_inner, msg)) ;
nthreads = nthreads_outer * nthreads_inner ;
nthreads = LAGRAPH_MIN (nthreads, n / 16) ;
nthreads = LAGRAPH_MAX (nthreads, 1) ;
GRB_TRY (GrB_Matrix_new (&Parent, GrB_BOOL, n, n)) ;
GRB_TRY (GxB_Container_new (&Parent_Container)) ;
GRB_TRY (GrB_Vector_free (&(Parent_Container->p))) ;
GRB_TRY (GrB_Vector_new (&(Parent_Container->p), Uint, n+1)) ;
GRB_TRY (GrB_assign (Parent_Container->p, NULL, NULL, 0, GrB_ALL, n+1,
NULL)) ;
GRB_TRY (GrB_apply (Parent_Container->p, NULL, NULL, ramp,
Parent_Container->p, 0, NULL)) ;
GRB_TRY (GrB_Vector_free (&(Parent_Container->x))) ;
GRB_TRY (GrB_Vector_new (&(Parent_Container->x), GrB_BOOL, 1)) ;
GRB_TRY (GrB_assign (Parent_Container->x, NULL, NULL, 0, GrB_ALL, 1,
NULL)) ;
Parent_Container->nrows = n ;
Parent_Container->ncols = n ;
Parent_Container->nvals = n ;
Parent_Container->nrows_nonempty = -1 ;
Parent_Container->ncols_nonempty = n ;
Parent_Container->iso = true ;
Parent_Container->jumbled = false ;
Parent_Container->format = GxB_SPARSE ;
Parent_Container->orientation = GrB_COLMAJOR ;
Parent_Container->Y = NULL ;
GRB_TRY (GrB_Vector_free (&(Parent_Container->i))) ;
GRB_TRY (GrB_Vector_new (&(Parent_Container->i), Int, n)) ;
parent = Parent_Container->i ;
GRB_TRY (GxB_Container_new (&A_Container)) ;
GRB_TRY (GxB_Container_new (&T_Container)) ;
GRB_TRY (GrB_Vector_new (&t, GrB_BOOL, n)) ;
GRB_TRY (GrB_assign (t, NULL, NULL, 0, GrB_ALL, n, NULL)) ;
GRB_TRY (GrB_assign (parent, NULL, NULL, 0, GrB_ALL, n, NULL)) ;
GRB_TRY (GrB_apply (parent, NULL, NULL, ramp, parent, 0, NULL)) ;
GRB_TRY (GrB_mxv (parent, NULL, imin, min_2ndi, A, t, NULL)) ;
GRB_TRY (GrB_free (&t)) ;
GRB_TRY (GrB_Vector_dup (&gp, parent)) ;
GRB_TRY (GrB_Vector_dup (&mngp, parent)) ;
GRB_TRY (GrB_Vector_new (&gp_new, Int, n)) ;
GRB_TRY (GrB_Vector_new (&t, GrB_BOOL, n)) ;
GRB_TRY (GrB_Vector_new (&parent2, Int, n)) ;
#ifdef TIMINGS
double toc = LAGraph_WallClockTime ( ) ;
timings [0] = toc - tic ; tic = toc ;
#endif
if (sampling)
{
#ifdef TIMINGS
double tic2 = LAGraph_WallClockTime ( ) ;
#endif
void *Ap = NULL, *Aj = NULL ;
uint64_t Ap_size, Aj_size, Ap_len, Aj_len ;
bool A_jumbled, A_iso ;
int Ap_handling, Aj_handling ;
GRB_TRY (LG_SET_FORMAT_HINT (A, LG_SPARSE)) ;
GRB_TRY (GrB_set (A, GrB_ROWMAJOR, GrB_STORAGE_ORIENTATION_HINT)) ;
GRB_TRY (GxB_unload_Matrix_into_Container (A, A_Container, NULL)) ;
A_jumbled = A_Container->jumbled ;
A_iso = A_Container->iso ;
GrB_Type Ap_type, Aj_type ;
GRB_TRY (GxB_Vector_unload (A_Container->p, &Ap, &Ap_type, &Ap_len,
&Ap_size, &Ap_handling, NULL)) ;
GRB_TRY (GxB_Vector_unload (A_Container->i, &Aj, &Aj_type, &Aj_len,
&Aj_size, &Aj_handling, NULL)) ;
bool Ap_is_32 = (Ap_type == GrB_UINT32 || Ap_type == GrB_INT32) ;
bool Aj_is_32 = (Aj_type == GrB_UINT32 || Aj_type == GrB_INT32) ;
const uint32_t *Ap32 = Ap_is_32 ? Ap : NULL ;
const uint64_t *Ap64 = Ap_is_32 ? NULL : Ap ;
const uint32_t *Aj32 = Aj_is_32 ? Aj : NULL ;
const uint64_t *Aj64 = Aj_is_32 ? NULL : Aj ;
bool Tp_is_32 = (nvals < UINT32_MAX) ;
bool Tj_is_32 = (n < INT32_MAX) ;
GrB_Type Tp_type = Tp_is_32 ? GrB_UINT32 : GrB_UINT64 ;
GrB_Type Tj_type = Tj_is_32 ? GrB_UINT32 : GrB_UINT64 ;
size_t tpsize = Tp_is_32 ? sizeof (uint32_t) : sizeof (uint64_t) ;
size_t tjsize = Tj_is_32 ? sizeof (uint32_t) : sizeof (uint64_t) ;
GrB_Index Tp_size = (n+1) * tpsize ;
GrB_Index Tj_size = nvals * tjsize ;
GrB_Index Tx_size = sizeof (bool) ;
LG_TRY (LAGraph_Malloc ((void **) &Tp, n+1, tpsize, msg)) ;
LG_TRY (LAGraph_Malloc ((void **) &Tj, nvals, tjsize, msg)) ;
LG_TRY (LAGraph_Calloc ((void **) &Tx, 1, sizeof (bool), msg)) ;
LG_TRY (LAGraph_Malloc ((void **) &range, nthreads+1, sizeof (int64_t),
msg)) ;
LG_TRY (LAGraph_Calloc ((void **) &count, nthreads+1, sizeof (uint64_t),
msg)) ;
uint32_t *Tp32 = Tp_is_32 ? Tp : NULL ;
uint64_t *Tp64 = Tp_is_32 ? NULL : Tp ;
uint32_t *Tj32 = Tj_is_32 ? Tj : NULL ;
uint64_t *Tj64 = Tj_is_32 ? NULL : Tj ;
int tid;
for (tid = 0 ; tid <= nthreads ; tid++)
{
range [tid] = (n * tid + nthreads - 1) / nthreads ;
}
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (tid = 0 ; tid < nthreads ; tid++)
{
for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
{
int64_t deg = AP (i + 1) - AP (i) ;
count [tid + 1] += LAGRAPH_MIN (FASTSV_SAMPLES, deg) ;
}
}
for (tid = 0 ; tid < nthreads ; tid++)
{
count [tid + 1] += count [tid] ;
}
#ifdef TIMINGS
double toc2 = LAGraph_WallClockTime ( ) ;
timings [3] = toc2 - tic2 ; tic2 = toc2 ;
#endif
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (tid = 0 ; tid < nthreads ; tid++)
{
GrB_Index p = count [tid] ;
int64_t ktid = range [tid] ;
SET_TP (ktid, p) ; for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
{
for (int64_t j = 0 ;
j < FASTSV_SAMPLES && AP (i) + j < AP (i + 1) ; j++)
{
uint64_t pi = AP (i) + j ;
uint64_t j = AJ (pi) ;
SET_TJ (p, j) ; p++ ;
}
SET_TP (i+1, p) ; }
}
GRB_TRY (GrB_Matrix_new (&T, GrB_BOOL, n, n)) ;
uint64_t T_nvals = TP (n) ;
uint64_t Tp_len = n+1 ;
uint64_t Tj_len = T_nvals ;
T_Container->nrows = n ;
T_Container->ncols = n ;
T_Container->nrows_nonempty = -1 ;
T_Container->ncols_nonempty = -1 ;
T_Container->nvals = T_nvals ;
T_Container->format = GxB_SPARSE ;
T_Container->orientation = GrB_ROWMAJOR ;
T_Container->iso = true ;
T_Container->jumbled = A_jumbled ;
GRB_TRY (GxB_Vector_load (T_Container->p, &Tp, Tp_type, Tp_len,
Tp_size, GrB_DEFAULT, NULL)) ;
GRB_TRY (GxB_Vector_load (T_Container->i, &Tj, Tj_type, Tj_len,
Tj_size, GrB_DEFAULT, NULL)) ;
GRB_TRY (GxB_Vector_load (T_Container->x, &Tx, GrB_BOOL, 1,
Tx_size, GrB_DEFAULT, NULL)) ;
GRB_TRY (GxB_load_Matrix_from_Container (T, T_Container, NULL)) ;
#ifdef TIMINGS
toc2 = LAGraph_WallClockTime ( ) ;
timings [4] = toc2 - tic2 ; tic2 = toc2 ;
#endif
LG_TRY (fastsv (T, parent2, mngp, &gp, &gp_new, t, eq, min, min_2nd,
Parent, Parent_Container, msg)) ;
#ifdef TIMINGS
toc2 = LAGraph_WallClockTime ( ) ;
timings [5] = toc2 - tic2 ; tic2 = toc2 ;
#endif
int handling = 0 ;
GrB_Type type = NULL ;
GRB_TRY (GxB_Vector_unload (parent, &Px, &type, &n, &Px_size,
&handling, NULL)) ;
bool Px_is_32 = (type == GrB_UINT32 || type == GrB_INT32) ;
uint32_t *Px32 = Px_is_32 ? Px : NULL ;
uint64_t *Px64 = Px_is_32 ? NULL : Px ;
#define HASH_SIZE 1024
#define HASH_SAMPLES 864
#define HASH(x) (((x << 4) + x) & (HASH_SIZE-1))
#define NEXT(x) ((x + 23) & (HASH_SIZE-1))
LG_TRY (LAGraph_Malloc ((void **) &ht_key, HASH_SIZE,
sizeof (GrB_Index), msg)) ;
LG_TRY (LAGraph_Calloc ((void **) &ht_count, HASH_SIZE,
sizeof (int), msg)) ;
for (int k = 0 ; k < HASH_SIZE ; k++)
{
ht_key [k] = UINT64_MAX ;
}
uint64_t seed = n ; int64_t key = -1 ; int max_count = 0 ; for (int64_t k = 0 ; k < HASH_SAMPLES ; k++)
{
uint64_t i = LG_Random64 (&seed) % n ;
GrB_Index x = PARENT (i) ;
GrB_Index h = HASH (x) ;
while (ht_key [h] != UINT64_MAX && ht_key [h] != x) h = NEXT (h) ;
ht_key [h] = x ;
ht_count [h]++ ;
if (ht_count [h] > max_count)
{
key = ht_key [h] ;
max_count = ht_count [h] ;
}
}
#ifdef TIMINGS
toc2 = LAGraph_WallClockTime ( ) ;
timings [6] = toc2 - tic2 ; tic2 = toc2 ;
#endif
GRB_TRY (GxB_unload_Matrix_into_Container (T, T_Container, NULL)) ;
int ignore ;
GRB_TRY (GxB_Vector_unload (T_Container->p, &Tp, &Tp_type, &Tp_len,
&Tp_size, &ignore, NULL)) ;
GRB_TRY (GxB_Vector_unload (T_Container->i, &Tj, &Tj_type, &Tj_len,
&Tj_size, &ignore, NULL)) ;
Tp_is_32 = (Tp_type == GrB_UINT32 || Tp_type == GrB_INT32) ;
Tj_is_32 = (Tj_type == GrB_UINT32 || Tj_type == GrB_INT32) ;
Tp32 = Tp_is_32 ? Tp : NULL ;
Tp64 = Tp_is_32 ? NULL : Tp ;
Tj32 = Tj_is_32 ? Tj : NULL ;
Tj64 = Tj_is_32 ? NULL : Tj ;
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (tid = 0 ; tid < nthreads ; tid++)
{
uint64_t ktid = range [tid] ;
GrB_Index p = AP (ktid) ;
for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
{
int64_t pi = PARENT (i) ;
int64_t pstart = p ;
SET_TP (i, p) ; if (pi != key)
{
for (GrB_Index pS = AP (i) ; pS < AP (i+1) ; pS++)
{
int64_t j = AJ (pS) ;
if (PARENT (j) != key)
{
SET_TJ (p, j) p++ ;
}
}
if (p - pstart < AP (i+1) - AP (i))
{
SET_TJ (p, key) ; p++ ;
}
}
}
count [tid] = p - TP (ktid) ;
}
nvals = 0 ;
for (tid = 0 ; tid < nthreads ; tid++)
{
int64_t ktid = range [tid] ;
memmove (Tj32 ? ((void *) (Tj32 + nvals)) : ((void *) (Tj64 + nvals)),
Tj32 ? ((void *) (Tj32 + TP (ktid))) : ((void *) (Tj64 + TP (ktid))),
tjsize * count [tid]) ;
#if 0#endif
nvals += count [tid] ;
count [tid] = nvals - count [tid] ;
}
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (tid = 0 ; tid < nthreads ; tid++)
{
int64_t ktid = range [tid] ;
GrB_Index p = TP (ktid) ;
for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
{
int64_t tp = TP (i) ;
tp -= p - count [tid] ;
SET_TP (i, tp) ; }
}
SET_TP (n, nvals) ; Tj_len = nvals ;
GRB_TRY (GxB_Vector_load (T_Container->p, &Tp, Tp_type, Tp_len,
Tp_size, GrB_DEFAULT, NULL)) ;
GRB_TRY (GxB_Vector_load (T_Container->i, &Tj, Tj_type, Tj_len,
Tj_size, GrB_DEFAULT, NULL)) ;
T_Container->nrows_nonempty = -1 ;
T_Container->ncols_nonempty = -1 ;
T_Container->jumbled = true ;
T_Container->nvals = nvals ;
GRB_TRY (GxB_load_Matrix_from_Container (T, T_Container, NULL)) ;
GRB_TRY (GxB_Vector_load (A_Container->p, &Ap, Ap_type, Ap_len,
Ap_size, Ap_handling, NULL)) ;
GRB_TRY (GxB_Vector_load (A_Container->i, &Aj, Aj_type, Aj_len,
Aj_size, Aj_handling, NULL)) ;
GRB_TRY (GxB_load_Matrix_from_Container (A, A_Container, NULL)) ;
GRB_TRY (GxB_Vector_load (parent, &Px, type, n, Px_size,
GrB_DEFAULT, NULL)) ;
A = T ;
#ifdef TIMINGS
toc2 = LAGraph_WallClockTime ( ) ;
timings [7] = toc2 - tic2 ; tic2 = toc2 ;
#endif
}
#ifdef TIMINGS
toc = LAGraph_WallClockTime ( ) ;
timings [1] = toc - tic ; tic = toc ;
#endif
if (nvals == 0)
{
(*component) = parent ;
Parent_Container->i = NULL ; LG_FREE_WORK ;
#ifdef TIMINGS
print_timings (timings) ;
LG_SET_BURBLE (false) ;
#endif
return (GrB_SUCCESS) ;
}
LG_TRY (fastsv (A, parent2, mngp, &gp, &gp_new, t, eq, min, min_2nd,
Parent, Parent_Container, msg)) ;
(*component) = parent ;
Parent_Container->i = NULL ; LG_FREE_WORK ;
#ifdef TIMINGS
toc = LAGraph_WallClockTime ( ) ;
timings [2] = toc - tic ; print_timings (timings) ;
LG_SET_BURBLE (false) ;
#endif
return (GrB_SUCCESS) ;
#else
LG_ASSERT (false, GrB_NOT_IMPLEMENTED) ;
#endif
}