#include "./kernel-defines.hpp"
const char *occa_elem_restriction_source = STRINGIFY_SOURCE(
@directive("#define PRINT_KERNEL_HASHES 0")
typedef CeedScalar *
QuadVector @dim(ELEMENT_SIZE, COMPONENT_COUNT, elementCount);
@kernel void applyRestriction(const CeedInt elementCount, const CeedInt *indices, CeedScalar *u, QuadVector v) {
@tile(TILE_SIZE, @outer, @inner) for (int element = 0; element < elementCount; ++element) {
@directive("#if PRINT_KERNEL_HASHES")
if (element == 0) {
printf("\n\napplyRestriction Kernel: " OKL_KERNEL_HASH "\n\n");
}
@directive("#endif")
@directive("#if USES_INDICES") for (int node = 0; node < ELEMENT_SIZE; ++node) {
const CeedInt index = indices[node + (element * ELEMENT_SIZE)];
for (int c = 0; c < COMPONENT_COUNT; ++c) {
v(node, c, element) = u[index + (c * UNSTRIDED_COMPONENT_STRIDE)];
}
}
@directive("#else") for (int node = 0; node < ELEMENT_SIZE; ++node) {
for (int c = 0; c < COMPONENT_COUNT; ++c) {
v(node, c, element) = u[(node * NODE_STRIDE) + (c * COMPONENT_STRIDE) + (element * ELEMENT_STRIDE)];
}
}
@directive("#endif")
}
}
@directive("#if USES_INDICES")
@kernel void applyRestrictionTranspose(const CeedInt elementCount, const CeedInt *quadIndices, const CeedInt *dofOffsets,
const CeedInt *dofIndices, const QuadVector u, CeedScalar *v) {
@tile(TILE_SIZE, @outer, @inner) for (int n = 0; n < NODE_COUNT; ++n) {
@directive("#if PRINT_KERNEL_HASHES")
if (n == 0) {
printf("\n\napplyRestrictionTranspose Kernel: " OKL_KERNEL_HASH "\n\n");
}
@directive("#endif")
CeedScalar vComp[COMPONENT_COUNT];
const CeedInt vIndex = quadIndices[n];
const CeedInt offsetStart = dofOffsets[n];
const CeedInt offsetEnd = dofOffsets[n + 1];
for (int c = 0; c < COMPONENT_COUNT; ++c) {
vComp[c] = 0;
}
for (CeedInt i = offsetStart; i < offsetEnd; ++i) {
const CeedInt index = dofIndices[i];
const int node = (index % ELEMENT_SIZE);
const int element = (index / ELEMENT_SIZE);
for (int c = 0; c < COMPONENT_COUNT; ++c) {
vComp[c] += u(node, c, element);
}
}
for (int c = 0; c < COMPONENT_COUNT; ++c) {
v[vIndex + (c * UNSTRIDED_COMPONENT_STRIDE)] += vComp[c];
}
}
}
@directive("#else")
@kernel void applyRestrictionTranspose(const CeedInt elementCount, const CeedInt *quadIndices, const CeedInt *dofOffsets,
const CeedInt *dofIndices, const QuadVector u, CeedScalar *v) {
@tile(TILE_SIZE, @outer, @inner) for (int element = 0; element < elementCount; ++element) {
@directive("#if PRINT_KERNEL_HASHES")
if (element == 0) {
printf("\n\napplyRestrictionTranspose Kernel: " OKL_KERNEL_HASH "\n\n");
}
@directive("#endif")
for (int node = 0; node < ELEMENT_SIZE; ++node) {
for (int c = 0; c < COMPONENT_COUNT; ++c) {
v[(node * NODE_STRIDE) + (c * COMPONENT_STRIDE) + (element * ELEMENT_STRIDE)] += u(node, c, element);
}
}
}
}
@directive("#endif")
);