#include "LAGraphX.h"
#if defined ( GRAPHBLAS_HAS_CUDA )
#define EXPERIMENTAL_GPU
#include "LAGraph_demo.h"
void GB_Global_hack_set (int k, int64_t hack) ;
#define NTHREAD_LIST 1
#define THREAD_LIST 0
#define LG_FREE_ALL \
{ \
LAGraph_Delete (&G, NULL) ; \
GrB_free (&A) ; \
}
char t [256] ;
char *method_name (int method, int sorting)
{
char *s ;
switch (method)
{
case LAGr_TriangleCount_AutoMethod: s = "default (Sandia_LUT) " ; break ;
case LAGr_TriangleCount_Burkhardt: s = "Burkhardt: sum ((A^2) .* A) / 6" ; break ;
case LAGr_TriangleCount_Cohen: s = "Cohen: sum ((L*U) .* A) / 2" ; break ;
case LAGr_TriangleCount_Sandia_LL: s = "Sandia_LL: sum ((L*L) .* L) " ; break ;
case LAGr_TriangleCount_Sandia_UU: s = "Sandia_UU: sum ((U*U) .* U) " ; break ;
case LAGr_TriangleCount_Sandia_LUT: s = "Sandia_LUT: sum ((L*U') .* L) " ; break ;
case LAGr_TriangleCount_Sandia_ULT: s = "Sandia_ULT: sum ((U*L') .* U) " ; break ;
default: printf("Unrecognized method: %d\n", method);
abort ( ) ;
}
if (sorting == LAGr_TriangleCount_Descending) sprintf (t, "%s sort: descending degree", s) ;
else if (sorting == LAGr_TriangleCount_Ascending) sprintf (t, "%s ascending degree", s) ;
else if (sorting == LAGr_TriangleCount_AutoSort) sprintf (t, "%s auto-sort", s) ;
else sprintf (t, "%s sort: none", s) ;
return (t) ;
}
void print_method (FILE *f, int method, int sorting)
{
fprintf (f, "%s\n", method_name (method, sorting)) ;
}
#endif
int main (int argc, char **argv)
{
#if defined ( GRAPHBLAS_HAS_CUDA )
char msg [LAGRAPH_MSG_LEN] ;
GrB_Matrix A = NULL ;
LAGraph_Graph G = NULL ;
bool burble = false ;
demo_init (burble) ;
int ntrials = 5 ;
printf ("# of trials: %d\n", ntrials) ;
int nt = NTHREAD_LIST ;
int Nthreads [20] = { 0, THREAD_LIST } ;
int nthreads_max, nthreads_outer, nthreads_inner ;
LAGRAPH_TRY (LAGraph_GetNumThreads (&nthreads_outer, &nthreads_inner, msg)) ;
nthreads_max = nthreads_outer * nthreads_inner ;
if (Nthreads [1] == 0)
{
Nthreads [1] = nthreads_max ;
for (int t = 2 ; t <= nt ; t++)
{
Nthreads [t] = Nthreads [t-1] / 2 ;
if (Nthreads [t] == 0) nt = t-1 ;
}
}
printf ("threads to test: ") ;
for (int t = 1 ; t <= nt ; t++)
{
int nthreads = Nthreads [t] ;
if (nthreads > nthreads_max) continue ;
printf (" %d", nthreads) ;
}
printf ("\n") ;
char *matrix_name = (argc > 1) ? argv [1] : "stdin" ;
LAGRAPH_TRY (readproblem (&G, NULL,
true, true, true, NULL, false, argc, argv)) ;
LAGRAPH_TRY (LAGraph_Graph_Print (G, LAGraph_SHORT, stdout, msg)) ;
LAGRAPH_TRY (LAGraph_Cached_OutDegree (G, msg)) ;
GrB_Index n, nvals ;
GRB_TRY (GrB_Matrix_nrows (&n, G->A)) ;
GRB_TRY (GrB_Matrix_nvals (&nvals, G->A)) ;
LG_SET_BURBLE (true) ;
GrB_Index ntriangles, ntsimple = 0 ;
#if 0#endif
double ttot = LAGraph_WallClockTime ( ) ;
printf ("\nwarmup method: ") ;
int presort = LAGr_TriangleCount_NoSort ; print_method (stdout, LAGr_TriangleCount_Sandia_ULT, presort) ;
GB_Global_hack_set (2, 2) ; LAGr_TriangleCount_Method method = LAGr_TriangleCount_Sandia_ULT ;
LAGRAPH_TRY (LAGr_TriangleCount_GPU (&ntriangles, G, &method, &presort, msg)) ;
printf ("# of triangles: %" PRIu64 "\n", ntriangles) ;
print_method (stdout, 6, presort) ;
ttot = LAGraph_WallClockTime ( ) - ttot ;
printf ("nthreads: %3d time: %12.6f rate: %6.2f (Sandia_ULT, one trial)\n",
nthreads_max, ttot, 1e-6 * nvals / ttot) ;
ttot = LAGraph_WallClockTime ( ) ;
GrB_Index ntriangles_gpu ;
GB_Global_hack_set (2, 1) ;
LAGRAPH_TRY (LAGr_TriangleCount_GPU (&ntriangles_gpu, G, &method, &presort, msg)) ;
LG_SET_BURBLE (false) ;
ttot = LAGraph_WallClockTime ( ) - ttot ;
printf ("# of triangles: %" PRIu64 " (GPU)\n", ntriangles_gpu) ;
print_method (stdout, 6, presort) ;
printf ("nthreads: %3d time: %12.6f rate: %6.2f (Sandia_ULT, one trial)\n",
nthreads_max, ttot, 1e-6 * nvals / ttot) ;
if (ntriangles_gpu != ntriangles)
{
printf ("wrong # triangles: %g %g\n", (double) ntriangles,
(double) ntsimple) ;
fflush (stdout) ;
fflush (stderr) ;
abort ( ) ;
}
GB_Global_hack_set (2, 2) ; presort = LAGr_TriangleCount_AutoSort ;
#if 0#endif
double t_best = INFINITY ;
int method_best = -1 ;
int nthreads_best = -1 ;
int sorting_best = 0 ;
int gpu_best = -1 ;
for (int method = 6 ; method <= 6 ; method++) {
int sorting = LAGr_TriangleCount_NoSort ;
for (int with_gpu = 0 ; with_gpu <= 1 ; with_gpu++)
{
if (with_gpu)
{
GB_Global_hack_set (2, 1) ; }
else
{
GB_Global_hack_set (2, 2) ; }
printf ("\nMethod: GPU: %d ", with_gpu) ;
print_method (stdout, method, sorting) ;
for (int t = 1 ; t <= nt ; t++)
{
int nthreads = Nthreads [t] ;
if (nthreads > nthreads_max) continue ;
LAGRAPH_TRY (LAGraph_SetNumThreads (1, nthreads, msg)) ;
GrB_Index nt2 ;
double ttot = 0, ttrial [100] ;
LAGr_TriangleCount_Presort p ;
LAGr_TriangleCount_Method m ;
for (int trial = 0 ; trial < ntrials ; trial++)
{
m = method ;
p = sorting ;
double tt = LAGraph_WallClockTime ( ) ;
LAGRAPH_TRY(LAGr_TriangleCount_GPU (&nt2, G, &m, &p, msg));
ttrial [trial] = LAGraph_WallClockTime ( ) - tt ;
ttot += ttrial [trial] ;
printf ("trial %2d: %12.6f sec rate %6.2f # triangles: "
"%g\n", trial, ttrial [trial],
1e-6 * nvals / ttrial [trial], (double) nt2) ;
}
ttot = ttot / ntrials ;
printf ("nthreads: %3d time: %12.6f rate: %6.2f", nthreads,
ttot, 1e-6 * nvals / ttot) ;
printf (" # of triangles: %" PRId64 " presort: %d\n",
ntriangles, (int) p) ;
if (nt2 != ntriangles)
{
printf ("Test failure!\n") ;
abort ( ) ;
}
fprintf (stderr, "\nMethod used: ") ;
print_method (stderr, m, p) ;
fprintf (stderr, "Avg: TC method%d.%d %3d: %10.3f sec: %s\n",
method, sorting, nthreads, ttot, matrix_name) ;
if (ttot < t_best)
{
t_best = ttot ;
method_best = method ;
nthreads_best = nthreads ;
sorting_best = sorting ;
gpu_best = with_gpu ;
}
}
}
}
printf ("\nBest method: GPU: %d ", gpu_best) ;
print_method (stdout, method_best, sorting_best) ;
printf ("nthreads: %3d time: %12.6f rate: %6.2f\n",
nthreads_best, t_best, 1e-6 * nvals / t_best) ;
LG_FREE_ALL ;
LAGRAPH_TRY (LAGraph_Finalize (msg)) ;
#endif
return (GrB_SUCCESS) ;
}