#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include "hwy/aligned_allocator.h"
#include "hwy/base.h"
#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "hwy/contrib/dot/dot_test.cc"
#include "hwy/foreach_target.h"
#include "hwy/highway.h"
#include "hwy/contrib/dot/dot-inl.h"
#include "hwy/tests/test_util-inl.h"
HWY_BEFORE_NAMESPACE();
namespace hwy {
namespace HWY_NAMESPACE {
namespace {
template <typename T1, typename T2>
HWY_NOINLINE T1 SimpleDot(const T1* pa, const T2* pb, size_t num) {
float sum = 0.0f;
for (size_t i = 0; i < num; ++i) {
sum += ConvertScalarTo<float>(pa[i]) * ConvertScalarTo<float>(pb[i]);
}
return ConvertScalarTo<T1>(sum);
}
HWY_MAYBE_UNUSED HWY_NOINLINE float SimpleDot(const float* pa,
const hwy::bfloat16_t* pb,
size_t num) {
float sum = 0.0f;
for (size_t i = 0; i < num; ++i) {
sum += pa[i] * F32FromBF16(pb[i]);
}
return sum;
}
HWY_MAYBE_UNUSED HWY_NOINLINE float SimpleDot(const bfloat16_t* pa,
const bfloat16_t* pb,
size_t num) {
float sum = 0.0f;
for (size_t i = 0; i < num; ++i) {
sum += F32FromBF16(pa[i]) * F32FromBF16(pb[i]);
}
return sum;
}
class TestDot {
template <int kAssumptions, class D>
void Test(D d, size_t num, size_t misalign_a, size_t misalign_b,
RandomState& rng) {
using T = TFromD<D>;
const size_t N = Lanes(d);
const auto random_t = [&rng]() {
const int32_t bits = static_cast<int32_t>(Random32(&rng)) & 1023;
return static_cast<float>(bits - 512) * (1.0f / 64);
};
const size_t padded =
(kAssumptions & Dot::kPaddedToVector) ? RoundUpTo(num, N) : num;
AlignedFreeUniquePtr<T[]> pa = AllocateAligned<T>(misalign_a + padded);
AlignedFreeUniquePtr<T[]> pb = AllocateAligned<T>(misalign_b + padded);
HWY_ASSERT(pa && pb);
T* a = pa.get() + misalign_a;
T* b = pb.get() + misalign_b;
size_t i = 0;
for (; i < num; ++i) {
a[i] = ConvertScalarTo<T>(random_t());
b[i] = ConvertScalarTo<T>(random_t());
}
for (; i < padded; ++i) {
a[i] = ConvertScalarTo<T>(0);
b[i] = ConvertScalarTo<T>(0);
}
const double expected = SimpleDot(a, b, num);
const double magnitude = expected > 0.0 ? expected : -expected;
const double actual =
ConvertScalarTo<double>(Dot::Compute<kAssumptions>(d, a, b, num));
const double max = static_cast<double>(8 * 8 * num);
HWY_ASSERT(-max <= actual && actual <= max);
const double tolerance =
IsFloat<T>() ? 96.0 * ConvertScalarTo<double>(Epsilon<T>()) *
HWY_MAX(magnitude, 1.0)
: 0;
HWY_ASSERT(expected - tolerance <= actual &&
actual <= expected + tolerance);
}
template <int kAssumptions, class D>
void ForeachMisalign(D d, size_t num, RandomState& rng) {
const size_t N = Lanes(d);
const size_t misalignments[3] = {0, N / 4, 3 * N / 5};
for (size_t ma : misalignments) {
for (size_t mb : misalignments) {
Test<kAssumptions>(d, num, ma, mb, rng);
}
}
}
template <int kAssumptions, class D>
void ForeachCount(D d, RandomState& rng) {
const size_t N = Lanes(d);
const size_t counts[] = {1,
3,
7,
16,
HWY_MAX(N / 2, 1),
HWY_MAX(2 * N / 3, 1),
N,
N + 1,
4 * N / 3,
3 * N,
8 * N,
8 * N + 2};
for (size_t num : counts) {
if ((kAssumptions & Dot::kAtLeastOneVector) && num < N) continue;
if ((kAssumptions & Dot::kMultipleOfVector) && (num % N) != 0) continue;
ForeachMisalign<kAssumptions>(d, num, rng);
}
}
public:
template <class T, class D>
HWY_INLINE void operator()(T , D d) {
RandomState rng;
ForeachCount<0>(d, rng);
ForeachCount<Dot::kAtLeastOneVector>(d, rng);
ForeachCount<Dot::kMultipleOfVector>(d, rng);
ForeachCount<Dot::kMultipleOfVector | Dot::kAtLeastOneVector>(d, rng);
ForeachCount<Dot::kPaddedToVector>(d, rng);
ForeachCount<Dot::kPaddedToVector | Dot::kAtLeastOneVector>(d, rng);
ForeachCount<Dot::kPaddedToVector | Dot::kMultipleOfVector>(d, rng);
ForeachCount<Dot::kPaddedToVector | Dot::kMultipleOfVector |
Dot::kAtLeastOneVector>(d, rng);
}
};
class TestDotF32BF16 {
template <int kAssumptions, class D>
void Test(D d, size_t num, size_t misalign_a, size_t misalign_b,
RandomState& rng) {
using T = TFromD<D>;
using T2 = hwy::bfloat16_t;
const size_t N = Lanes(d);
const auto random_t = [&rng]() {
const int32_t bits = static_cast<int32_t>(Random32(&rng)) & 1023;
return static_cast<float>(bits - 512) * (1.0f / 64);
};
const size_t padded =
(kAssumptions & Dot::kPaddedToVector) ? RoundUpTo(num, N) : num;
AlignedFreeUniquePtr<T[]> pa = AllocateAligned<T>(misalign_a + padded);
AlignedFreeUniquePtr<T2[]> pb = AllocateAligned<T2>(misalign_b + padded);
HWY_ASSERT(pa && pb);
T* a = pa.get() + misalign_a;
T2* b = pb.get() + misalign_b;
size_t i = 0;
for (; i < num; ++i) {
a[i] = ConvertScalarTo<T>(random_t());
b[i] = ConvertScalarTo<T2>(random_t());
}
for (; i < padded; ++i) {
ScalableTag<float> df1;
a[i] = ConvertScalarTo<T>(GetLane(NaN(df1)));
b[i] = ConvertScalarTo<T2>(GetLane(NaN(df1)));
}
const double expected = SimpleDot(a, b, num);
const double magnitude = expected > 0.0 ? expected : -expected;
const double actual =
ConvertScalarTo<double>(Dot::Compute<kAssumptions>(d, a, b, num));
const double max = static_cast<double>(8 * 8 * num);
HWY_ASSERT(-max <= actual && actual <= max);
const double tolerance =
64.0 * ConvertScalarTo<double>(Epsilon<T2>()) * HWY_MAX(magnitude, 1.0);
HWY_ASSERT(expected - tolerance <= actual &&
actual <= expected + tolerance);
}
template <int kAssumptions, class D>
void ForeachMisalign(D d, size_t num, RandomState& rng) {
const size_t N = Lanes(d);
const size_t misalignments[3] = {0, N / 4, 3 * N / 5};
for (size_t ma : misalignments) {
for (size_t mb : misalignments) {
Test<kAssumptions>(d, num, ma, mb, rng);
}
}
}
template <int kAssumptions, class D>
void ForeachCount(D d, RandomState& rng) {
const size_t N = Lanes(d);
const size_t counts[] = {1,
3,
7,
16,
HWY_MAX(N / 2, 1),
HWY_MAX(2 * N / 3, 1),
N,
N + 1,
4 * N / 3,
3 * N,
8 * N,
8 * N + 2};
for (size_t num : counts) {
if ((kAssumptions & Dot::kAtLeastOneVector) && num < N) continue;
if ((kAssumptions & Dot::kMultipleOfVector) && (num % N) != 0) continue;
ForeachMisalign<kAssumptions>(d, num, rng);
}
}
public:
template <class T, class D>
HWY_INLINE void operator()(T , D d) {
RandomState rng;
ForeachCount<0>(d, rng);
ForeachCount<Dot::kAtLeastOneVector>(d, rng);
ForeachCount<Dot::kMultipleOfVector>(d, rng);
ForeachCount<Dot::kMultipleOfVector | Dot::kAtLeastOneVector>(d, rng);
ForeachCount<Dot::kPaddedToVector>(d, rng);
ForeachCount<Dot::kPaddedToVector | Dot::kAtLeastOneVector>(d, rng);
ForeachCount<Dot::kPaddedToVector | Dot::kMultipleOfVector>(d, rng);
ForeachCount<Dot::kPaddedToVector | Dot::kMultipleOfVector |
Dot::kAtLeastOneVector>(d, rng);
}
};
void TestAllDot() { ForFloatTypes(ForPartialVectors<TestDot>()); }
void TestAllDotF32BF16() { ForPartialVectors<TestDotF32BF16>()(float()); }
void TestAllDotBF16() { ForShrinkableVectors<TestDot>()(bfloat16_t()); }
void TestAllDotI16() { ForShrinkableVectors<TestDot>()(int16_t()); }
} } } HWY_AFTER_NAMESPACE();
#if HWY_ONCE
namespace hwy {
namespace {
HWY_BEFORE_TEST(DotTest);
HWY_EXPORT_AND_TEST_P(DotTest, TestAllDot);
HWY_EXPORT_AND_TEST_P(DotTest, TestAllDotF32BF16);
HWY_EXPORT_AND_TEST_P(DotTest, TestAllDotBF16);
HWY_EXPORT_AND_TEST_P(DotTest, TestAllDotI16);
HWY_AFTER_TEST();
} } HWY_TEST_MAIN();
#endif