#include "hwy/aligned_allocator.h"
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <array>
#include <random>
#include <set>
#include <vector>
#include "hwy/base.h"
#include "hwy/per_target.h"
#include "hwy/tests/hwy_gtest.h"
#include "hwy/tests/test_util-inl.h"
namespace {
template <size_t N>
class SampleObject {
public:
SampleObject() { data_[0] = 'a'; }
explicit SampleObject(int* counter) : counter_(counter) {
if (counter) (*counter)++;
data_[0] = 'b';
}
~SampleObject() {
if (counter_) (*counter_)--;
}
static_assert(N > sizeof(int*), "SampleObject size too small.");
int* counter_ = nullptr;
char data_[N - sizeof(int*)];
};
class FakeAllocator {
public:
static void* StaticAlloc(void* opaque, size_t bytes) {
return reinterpret_cast<FakeAllocator*>(opaque)->Alloc(bytes);
}
static void StaticFree(void* opaque, void* memory) {
return reinterpret_cast<FakeAllocator*>(opaque)->Free(memory);
}
size_t PendingAllocs() { return allocs_.size(); }
private:
void* Alloc(size_t bytes) {
void* ret = malloc(bytes);
allocs_.insert(ret);
return ret;
}
void Free(void* memory) {
if (!memory) return;
HWY_ASSERT(allocs_.end() != allocs_.find(memory));
allocs_.erase(memory);
free(memory);
}
std::set<void*> allocs_;
};
}
namespace hwy {
namespace {
#if !HWY_TEST_STANDALONE
class AlignedAllocatorTest : public testing::Test {};
#endif
TEST(AlignedAllocatorTest, TestFreeNullptr) {
FreeAlignedBytes(nullptr, nullptr,
nullptr);
}
TEST(AlignedAllocatorTest, TestLog2) {
HWY_ASSERT_EQ(0u, detail::ShiftCount(1));
HWY_ASSERT_EQ(1u, detail::ShiftCount(2));
HWY_ASSERT_EQ(3u, detail::ShiftCount(8));
}
TEST(AlignedAllocatorTest, TestOverflow) {
constexpr size_t max = ~size_t(0);
constexpr size_t msb = (max >> 1) + 1;
using Size5 = std::array<uint8_t, 5>;
using Size10 = std::array<uint8_t, 10>;
HWY_ASSERT(nullptr ==
detail::AllocateAlignedItems<uint32_t>(max / 2, nullptr, nullptr));
HWY_ASSERT(nullptr ==
detail::AllocateAlignedItems<uint32_t>(max / 3, nullptr, nullptr));
HWY_ASSERT(nullptr ==
detail::AllocateAlignedItems<Size5>(max / 4, nullptr, nullptr));
HWY_ASSERT(nullptr ==
detail::AllocateAlignedItems<uint16_t>(msb, nullptr, nullptr));
HWY_ASSERT(nullptr ==
detail::AllocateAlignedItems<double>(msb + 1, nullptr, nullptr));
HWY_ASSERT(nullptr ==
detail::AllocateAlignedItems<Size10>(msb / 4, nullptr, nullptr));
}
TEST(AlignedAllocatorTest, TestAllocDefaultPointers) {
const size_t kSize = 7777;
void* ptr = AllocateAlignedBytes(kSize, nullptr,
nullptr);
HWY_ASSERT(ptr != nullptr);
HWY_ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % HWY_ALIGNMENT);
char* p = static_cast<char*>(ptr);
size_t ret = 0;
for (size_t i = 0; i < kSize; i++) {
p[i] = static_cast<char>(i & 0x7F);
if (i) ret += static_cast<size_t>(p[i] * p[i - 1]);
}
HWY_ASSERT(ret != size_t{0});
FreeAlignedBytes(ptr, nullptr, nullptr);
}
TEST(AlignedAllocatorTest, TestEmptyAlignedUniquePtr) {
AlignedUniquePtr<SampleObject<32>> ptr(nullptr, AlignedDeleter());
AlignedUniquePtr<SampleObject<32>[]> arr(nullptr, AlignedDeleter());
}
TEST(AlignedAllocatorTest, TestEmptyAlignedFreeUniquePtr) {
AlignedFreeUniquePtr<std::array<char, 32>> ptr(nullptr, AlignedFreer());
AlignedFreeUniquePtr<std::array<char, 32>[]> arr(nullptr, AlignedFreer());
}
TEST(AlignedAllocatorTest, TestCustomAlloc) {
FakeAllocator fake_alloc;
const size_t kSize = 7777;
void* ptr =
AllocateAlignedBytes(kSize, &FakeAllocator::StaticAlloc, &fake_alloc);
HWY_ASSERT(ptr != nullptr);
HWY_ASSERT_EQ(1U, fake_alloc.PendingAllocs());
HWY_ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % HWY_ALIGNMENT);
FreeAlignedBytes(ptr, &FakeAllocator::StaticFree, &fake_alloc);
HWY_ASSERT_EQ(0U, fake_alloc.PendingAllocs());
}
TEST(AlignedAllocatorTest, TestMakeUniqueAlignedDefaultConstructor) {
{
auto ptr = MakeUniqueAligned<SampleObject<24>>();
HWY_ASSERT_EQ('a', ptr->data_[0]);
HWY_ASSERT(nullptr == ptr->counter_);
}
}
TEST(AlignedAllocatorTest, TestMakeUniqueAligned) {
int counter = 0;
{
auto ptr = MakeUniqueAligned<SampleObject<24>>(&counter);
HWY_ASSERT_EQ(1, counter);
HWY_ASSERT_EQ('b', ptr->data_[0]);
}
HWY_ASSERT_EQ(0, counter);
}
TEST(AlignedAllocatorTest, TestMakeUniqueAlignedArray) {
int counter = 0;
{
auto arr = MakeUniqueAlignedArray<SampleObject<24>>(7, &counter);
HWY_ASSERT_EQ(7, counter);
for (size_t i = 0; i < 7; i++) {
HWY_ASSERT_EQ('b', arr[i].data_[0]);
}
}
HWY_ASSERT_EQ(0, counter);
}
TEST(AlignedAllocatorTest, TestAllocSingleInt) {
auto ptr = AllocateAligned<uint32_t>(1);
HWY_ASSERT(ptr.get() != nullptr);
HWY_ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(ptr.get()) % HWY_ALIGNMENT);
ptr.reset(nullptr);
HWY_ASSERT(nullptr == ptr.get());
}
TEST(AlignedAllocatorTest, TestAllocMultipleInt) {
const size_t kSize = 7777;
auto ptr = AllocateAligned<uint32_t>(kSize);
HWY_ASSERT(ptr.get() != nullptr);
HWY_ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(ptr.get()) % HWY_ALIGNMENT);
HWY_ASSERT(&(ptr[0]) + 1 == &(ptr[1]));
size_t ret = 0;
for (size_t i = 0; i < kSize; i++) {
ptr[i] = static_cast<uint32_t>(i);
if (i) ret += static_cast<size_t>(ptr[i]) * ptr[i - 1];
}
HWY_ASSERT(ret != size_t{0});
}
TEST(AlignedAllocatorTest, TestMakeUniqueAlignedArrayWithCustomAlloc) {
FakeAllocator fake_alloc;
int counter = 0;
{
auto arr = MakeUniqueAlignedArrayWithAlloc<SampleObject<24>>(
7, FakeAllocator::StaticAlloc, FakeAllocator::StaticFree, &fake_alloc,
&counter);
HWY_ASSERT(arr.get() != nullptr);
HWY_ASSERT_EQ(1u, fake_alloc.PendingAllocs());
HWY_ASSERT_EQ(7, counter);
for (size_t i = 0; i < 7; i++) {
HWY_ASSERT_EQ('b', arr[i].data_[0]);
}
}
HWY_ASSERT_EQ(0, counter);
HWY_ASSERT_EQ(0u, fake_alloc.PendingAllocs());
}
TEST(AlignedAllocatorTest, TestDefaultInit) {
std::vector<AlignedUniquePtr<int[]>> ptrs;
std::vector<AlignedFreeUniquePtr<double[]>> free_ptrs;
ptrs.resize(128);
free_ptrs.resize(128);
std::mt19937 rng(129); std::uniform_int_distribution<size_t> dist(0, 127);
ptrs[dist(rng)] = MakeUniqueAlignedArray<int>(123);
free_ptrs[dist(rng)] = AllocateAligned<double>(456);
const auto addr1 = reinterpret_cast<uintptr_t>(ptrs[dist(rng)].get());
const auto addr2 = reinterpret_cast<uintptr_t>(free_ptrs[dist(rng)].get());
constexpr size_t kBits = sizeof(uintptr_t) * 8;
HWY_ASSERT_EQ((addr1 >> (kBits - 1)) >> (kBits - 1),
(addr2 >> (kBits - 1)) >> (kBits - 1));
}
using std::array;
using std::vector;
template <typename T>
void CheckEqual(const T& t1, const T& t2) {
HWY_ASSERT_EQ(t1.size(), t2.size());
for (size_t i = 0; i < t1.size(); i++) {
HWY_ASSERT_EQ(t1[i], t2[i]);
}
}
template <typename T>
void CheckEqual(const AlignedNDArray<T, 1>& a, const vector<T>& v) {
const array<size_t, 1> want_shape({v.size()});
const array<size_t, 1> got_shape = a.shape();
CheckEqual(got_shape, want_shape);
Span<const T> a_span = a[{}];
HWY_ASSERT_EQ(a_span.size(), v.size());
for (size_t i = 0; i < a_span.size(); i++) {
HWY_ASSERT_EQ(a_span[i], v[i]);
HWY_ASSERT_EQ(*(a_span.data() + i), v[i]);
}
}
template <typename T>
void CheckEqual(const AlignedNDArray<T, 2>& a, const vector<vector<T>>& v) {
const array<size_t, 2> want_shape({v.size(), v[1].size()});
for (const vector<T>& row : v) {
HWY_ASSERT_EQ(row.size(), want_shape[1]);
}
const std::array<size_t, 2> got_shape = a.shape();
CheckEqual(got_shape, want_shape);
HWY_ASSERT_EQ(a.size(), want_shape[0] * want_shape[1]);
for (size_t row_index = 0; row_index < v.size(); ++row_index) {
vector<T> want_row = v[row_index];
Span<const T> got_row = a[{row_index}];
HWY_ASSERT_EQ(got_row.size(), want_row.size());
for (size_t column_index = 0; column_index < got_row.size();
column_index++) {
HWY_ASSERT_EQ(a[{row_index}][column_index], want_row[column_index]);
HWY_ASSERT_EQ(got_row[column_index], want_row[column_index]);
HWY_ASSERT_EQ(*(a[{row_index}].data() + column_index),
want_row[column_index]);
}
}
}
TEST(AlignedAllocatorTest, TestAlignedNDArray) {
AlignedNDArray<float, 1> a1({4});
CheckEqual(a1, {0, 0, 0, 0});
a1[{}][2] = 3.4f;
CheckEqual(a1, {0, 0, 3.4f, 0});
AlignedNDArray<float, 2> a2({2, 3});
CheckEqual(a2, {{0, 0, 0}, {0, 0, 0}});
a2[{1}][1] = 5.1f;
CheckEqual(a2, {{0, 0, 0}, {0, 5.1f, 0}});
float f0[] = {1.0f, 2.0f, 3.0f};
float f1[] = {4.0f, 5.0f, 6.0f};
hwy::CopyBytes(f0, a2[{0}].data(), 3 * sizeof(float));
hwy::CopyBytes(f1, a2[{1}].data(), 3 * sizeof(float));
CheckEqual(a2, {{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}});
}
TEST(AlignedAllocatorTest, TestAlignedNDArrayAlignment) {
AlignedNDArray<float, 4> a({3, 3, 3, 3});
for (size_t d0 = 0; d0 < a.shape()[0]; d0++) {
for (size_t d1 = 0; d1 < a.shape()[1]; d1++) {
for (size_t d2 = 0; d2 < a.shape()[2]; d2++) {
HWY_ASSERT_EQ(
reinterpret_cast<uintptr_t>(a[{d0, d1, d2}].data()) % VectorBytes(),
0);
}
}
}
}
TEST(AlignedAllocatorTest, TestSpanCopyAssignment) {
AlignedNDArray<float, 2> a({2, 2});
CheckEqual(a, {{0.0f, 0.0f}, {0.0f, 0.0f}});
a[{0}] = {1.0f, 2.0f};
a[{1}] = {3.0f, 4.0f};
CheckEqual(a, {{1.0f, 2.0f}, {3.0f, 4.0f}});
}
TEST(AlignedAllocatorTest, TestAlignedNDArrayTruncate) {
AlignedNDArray<size_t, 4> a({8, 8, 8, 8});
const size_t last_axis_memory_shape = a.memory_shape()[3];
const auto compute_value = [&](const std::array<size_t, 4>& index) {
return index[0] * 8 * 8 * 8 + index[1] * 8 * 8 + index[2] * 8 * 8 +
index[3];
};
for (size_t axis0 = 0; axis0 < a.shape()[0]; ++axis0) {
for (size_t axis1 = 0; axis1 < a.shape()[1]; ++axis1) {
for (size_t axis2 = 0; axis2 < a.shape()[2]; ++axis2) {
for (size_t axis3 = 0; axis3 < a.shape()[3]; ++axis3) {
a[{axis0, axis1, axis2}][axis3] =
compute_value({axis0, axis1, axis2, axis3});
}
}
}
}
const auto verify_values = [&](const AlignedNDArray<size_t, 4>& array) {
for (size_t axis0 = 0; axis0 < array.shape()[0]; ++axis0) {
for (size_t axis1 = 0; axis1 < array.shape()[1]; ++axis1) {
for (size_t axis2 = 0; axis2 < array.shape()[2]; ++axis2) {
for (size_t axis3 = 0; axis3 < array.shape()[3]; ++axis3) {
HWY_ASSERT_EQ((array[{axis0, axis1, axis2}][axis3]),
(compute_value({axis0, axis1, axis2, axis3})));
}
}
}
}
};
a.truncate({7, 7, 7, 7});
HWY_ASSERT_EQ(a.shape()[0], 7);
HWY_ASSERT_EQ(a.shape()[1], 7);
HWY_ASSERT_EQ(a.shape()[2], 7);
HWY_ASSERT_EQ(a.shape()[3], 7);
HWY_ASSERT_EQ(a.memory_shape()[0], 8);
HWY_ASSERT_EQ(a.memory_shape()[1], 8);
HWY_ASSERT_EQ(a.memory_shape()[2], 8);
HWY_ASSERT_EQ(a.memory_shape()[3], last_axis_memory_shape);
verify_values(a);
a.truncate({6, 5, 4, 3});
HWY_ASSERT_EQ(a.shape()[0], 6);
HWY_ASSERT_EQ(a.shape()[1], 5);
HWY_ASSERT_EQ(a.shape()[2], 4);
HWY_ASSERT_EQ(a.shape()[3], 3);
HWY_ASSERT_EQ(a.memory_shape()[0], 8);
HWY_ASSERT_EQ(a.memory_shape()[1], 8);
HWY_ASSERT_EQ(a.memory_shape()[2], 8);
HWY_ASSERT_EQ(a.memory_shape()[3], last_axis_memory_shape);
verify_values(a);
}
TEST(AlignedAllocatorTest, TestAlignedVector) {
AlignedVector<int> vec{0, 1, 2, 3, 4};
HWY_ASSERT_EQ(5, vec.size());
HWY_ASSERT_EQ(0, vec[0]);
HWY_ASSERT_EQ(2, vec.at(2));
HWY_ASSERT_EQ(0, vec.front());
HWY_ASSERT_EQ(4, vec.back());
vec.pop_back();
HWY_ASSERT_EQ(3, vec.back());
HWY_ASSERT_EQ(4, vec.size());
vec.push_back(4);
vec.push_back(5);
HWY_ASSERT_EQ(5, vec.back());
HWY_ASSERT_EQ(6, vec.size());
const size_t initialCapacity = vec.capacity();
for (auto i = vec.size(); i < initialCapacity + 10; ++i) {
vec.push_back(static_cast<int>(i));
}
HWY_ASSERT(vec.capacity() > initialCapacity);
for (size_t i = 0; i < vec.size(); ++i) {
HWY_ASSERT_EQ(i, vec[i]);
}
vec.clear();
HWY_ASSERT(vec.empty());
}
} }
HWY_TEST_MAIN();