#include "HCheckConfig.h"
#if 0#endif
#include <iostream>
#include "catch.hpp"
#include "matrix_multiplication.hpp"
#include "parallel/HighsParallel.h"
using namespace highs;
const int numThreads = (std::thread::hardware_concurrency() + 1) / 2;
const bool dev_run = false;
int64_t fib_sequential(const int64_t n) {
if (n <= 1) return 1;
return fib_sequential(n - 1) + fib_sequential(n - 2);
}
int64_t fib(const int64_t n) {
if (n <= 20) return fib_sequential(n);
int64_t n1;
parallel::spawn([&]() {
n1 = fib(n - 1);
TSAN_ANNOTATE_HAPPENS_BEFORE(&n1);
});
int64_t n2 = fib(n - 2);
TSAN_ANNOTATE_HAPPENS_BEFORE(&n2);
parallel::sync();
TSAN_ANNOTATE_HAPPENS_AFTER(&n1);
TSAN_ANNOTATE_HAPPENS_AFTER(&n2);
return n1 + n2;
}
#if 0#endif
void matrix_multiplication_highs(unsigned nthreads) {
parallel::for_each(0, N, [&](HighsInt start, HighsInt end) {
for (int i = start; i < end; ++i) {
for (int j = 0; j < N; j++) {
a[i][j] = i + j;
}
}
});
parallel::for_each(0, N, [&](HighsInt start, HighsInt end) {
for (int i = start; i < end; ++i) {
for (int j = 0; j < N; j++) {
b[i][j] = i * j;
}
}
});
parallel::for_each(0, N, [&](HighsInt start, HighsInt end) {
for (int i = start; i < end; ++i) {
for (int j = 0; j < N; j++) {
c[i][j] = 0;
}
}
});
parallel::for_each(0, N, [&](HighsInt start, HighsInt end) {
for (int i = start; i < end; ++i) {
for (int j = 0; j < N; j++) {
for (int k = 0; k < N; k++) {
c[i][j] += a[i][k] * b[k][j];
}
}
}
});
}
#if 0#endif
std::chrono::microseconds measure_time_highs(unsigned num_threads) {
auto beg = std::chrono::high_resolution_clock::now();
matrix_multiplication_highs(num_threads);
auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast<std::chrono::microseconds>(end - beg);
}
int N = 0;
double **a = nullptr, **b = nullptr, **c = nullptr;
void matrix_multiplication(const std::string& model, const unsigned num_threads,
const unsigned num_rounds) {
if (dev_run)
std::cout << std::setw(12) << "size" << std::setw(12) << "runtime"
<< std::endl;
for (int i = 128; i <= 512; i += 128) {
N = i;
allocate_matrix();
double runtime{0.0};
for (unsigned j = 0; j < num_rounds; ++j) {
#if 0#else
runtime += measure_time_highs(num_threads).count();
#endif
}
if (dev_run)
std::cout << std::setw(12) << N << std::setw(12)
<< runtime / num_rounds / 1e3 << std::endl;
deallocate_matrix();
}
}
TEST_CASE("MatrixMultHighs", "[parallel]") {
HighsTaskExecutor::shutdown();
parallel::initialize_scheduler(numThreads);
if (dev_run) std::cout << "\nhighs workstealing for loop:" << std::endl;
matrix_multiplication("highs", parallel::num_threads(), 1);
HighsTaskExecutor::shutdown();
}
TEST_CASE("FibonacciTasksHighs", "[parallel]") {
HighsTaskExecutor::shutdown();
auto beg = std::chrono::high_resolution_clock::now();
parallel::initialize_scheduler(numThreads);
int64_t result = fib(41);
auto end = std::chrono::high_resolution_clock::now();
if (dev_run)
std::cout << "time elapsed for fib(41) with HiGHS work stealing: "
<< (std::chrono::duration_cast<std::chrono::microseconds>(end -
beg)
.count() /
1e3)
<< "ms" << std::endl;
REQUIRE(result == 267914296);
HighsTaskExecutor::shutdown();
}
#if 0#endif