#include "hwy/aligned_allocator.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <atomic>
#include <limits>
#include "hwy/base.h"
namespace hwy {
namespace {
#if HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && \
__riscv_v_intrinsic >= 11000
constexpr size_t kAlignment = HWY_MAX(HWY_ALIGNMENT, 4096);
#else
constexpr size_t kAlignment = HWY_ALIGNMENT;
#endif
#if HWY_ARCH_X86
constexpr size_t kAlias = HWY_MAX(kAlignment, 1024);
#else
constexpr size_t kAlias = kAlignment;
#endif
#pragma pack(push, 1)
struct AllocationHeader {
void* allocated;
size_t payload_size;
};
#pragma pack(pop)
size_t NextAlignedOffset() {
static std::atomic<size_t> next{0};
static_assert(kAlias % kAlignment == 0, "kAlias must be a multiple");
constexpr size_t kGroups = kAlias / kAlignment;
const size_t group = next.fetch_add(1, std::memory_order_relaxed) % kGroups;
const size_t offset = kAlignment * group;
HWY_DASSERT((offset % kAlignment == 0) && offset <= kAlias);
return offset;
}
}
HWY_DLLEXPORT void* AllocateAlignedBytes(const size_t payload_size,
AllocPtr alloc_ptr, void* opaque_ptr) {
HWY_ASSERT(payload_size != 0); if (payload_size >= std::numeric_limits<size_t>::max() / 2) {
HWY_DASSERT(false && "payload_size too large");
return nullptr;
}
size_t offset = NextAlignedOffset();
if (offset == 0) {
offset = RoundUpTo(sizeof(AllocationHeader), kAlignment);
}
const size_t allocated_size = kAlias + offset + payload_size;
void* allocated;
if (alloc_ptr == nullptr) {
allocated = malloc(allocated_size);
} else {
allocated = (*alloc_ptr)(opaque_ptr, allocated_size);
}
if (allocated == nullptr) return nullptr;
uintptr_t aligned = reinterpret_cast<uintptr_t>(allocated) + kAlias;
static_assert((kAlias & (kAlias - 1)) == 0, "kAlias must be a power of 2");
static_assert(kAlias >= kAlignment, "Cannot align to more than kAlias");
aligned &= ~(kAlias - 1);
const uintptr_t payload = aligned + offset; HWY_DASSERT(payload % kAlignment == 0);
AllocationHeader* header = reinterpret_cast<AllocationHeader*>(payload) - 1;
HWY_DASSERT(reinterpret_cast<uintptr_t>(header) >= aligned);
header->allocated = allocated;
header->payload_size = payload_size;
return HWY_ASSUME_ALIGNED(reinterpret_cast<void*>(payload), kAlignment);
}
HWY_DLLEXPORT void FreeAlignedBytes(const void* aligned_pointer,
FreePtr free_ptr, void* opaque_ptr) {
if (aligned_pointer == nullptr) return;
const uintptr_t payload = reinterpret_cast<uintptr_t>(aligned_pointer);
HWY_DASSERT(payload % kAlignment == 0);
const AllocationHeader* header =
reinterpret_cast<const AllocationHeader*>(payload) - 1;
if (free_ptr == nullptr) {
free(header->allocated);
} else {
(*free_ptr)(opaque_ptr, header->allocated);
}
}
HWY_DLLEXPORT void AlignedDeleter::DeleteAlignedArray(void* aligned_pointer,
FreePtr free_ptr,
void* opaque_ptr,
ArrayDeleter deleter) {
if (aligned_pointer == nullptr) return;
const uintptr_t payload = reinterpret_cast<uintptr_t>(aligned_pointer);
HWY_DASSERT(payload % kAlignment == 0);
const AllocationHeader* header =
reinterpret_cast<const AllocationHeader*>(payload) - 1;
if (deleter) {
(*deleter)(aligned_pointer, header->payload_size);
}
if (free_ptr == nullptr) {
free(header->allocated);
} else {
(*free_ptr)(opaque_ptr, header->allocated);
}
}
}