#include "ceed-occa-cpu-operator.hpp"
#include "ceed-occa-elem-restriction.hpp"
#include "ceed-occa-qfunction.hpp"
#include "ceed-occa-qfunctioncontext.hpp"
#include "ceed-occa-simplex-basis.hpp"
#include "ceed-occa-tensor-basis.hpp"
#define CEED_OCCA_PRINT_KERNEL_HASHES 0
namespace ceed {
namespace occa {
CpuOperator::CpuOperator() {}
CpuOperator::~CpuOperator() {}
void CpuOperator::setupVectors() {
setupVectors(args.inputCount(), args.opInputs, args.qfInputs, dofInputs);
setupVectors(args.outputCount(), args.opOutputs, args.qfOutputs, dofOutputs);
}
void CpuOperator::setupVectors(const int fieldCount, OperatorFieldVector &opFields, QFunctionFieldVector &qfFields, VectorVector &vectors) {
for (int i = 0; i < fieldCount; ++i) {
const QFunctionField &qfField = qfFields[i];
const OperatorField &opField = opFields[i];
if (qfField.evalMode == CEED_EVAL_WEIGHT) {
vectors.push_back(NULL);
continue;
}
int entries;
if (qfField.evalMode == CEED_EVAL_NONE) {
entries = (ceedElementCount * ceedQ * qfField.size);
} else {
entries = (ceedElementCount * opField.getElementSize() * opField.getComponentCount());
}
Vector *dofVector = new Vector();
dofVector->ceed = ceed;
dofVector->resize(entries);
vectors.push_back(dofVector);
}
}
void CpuOperator::freeVectors() {
for (int i = 0; i < args.inputCount(); ++i) {
delete dofInputs[i];
}
for (int i = 0; i < args.outputCount(); ++i) {
delete dofOutputs[i];
}
dofInputs.clear();
dofOutputs.clear();
}
void CpuOperator::setupInputs(Vector *in) {
for (int i = 0; i < args.inputCount(); ++i) {
if (args.getInputEvalMode(i) == CEED_EVAL_WEIGHT) {
continue;
}
const OperatorField &opField = args.getOpInput(i);
Vector *input = opField.usesActiveVector() ? in : opField.vec;
Vector *output = dofInputs[i];
opField.elemRestriction->apply(CEED_NOTRANSPOSE, *input, *output);
}
}
void CpuOperator::setupOutputs(Vector *out) {
for (int i = 0; i < args.outputCount(); ++i) {
if (args.getOutputEvalMode(i) == CEED_EVAL_WEIGHT) {
continue;
}
const OperatorField &opField = args.getOpOutput(i);
Vector *input = dofOutputs[i];
Vector *output = opField.usesActiveVector() ? out : opField.vec;
opField.elemRestriction->apply(CEED_TRANSPOSE, *input, *output);
}
}
void CpuOperator::applyQFunction() {
if (qfunction->qFunctionContext) {
QFunctionContext *ctx = QFunctionContext::from(qfunction->qFunctionContext);
applyAddKernel.pushArg(ctx->getKernelArg());
} else {
applyAddKernel.pushArg(::occa::null);
}
applyAddKernel.pushArg(ceedElementCount);
for (int i = 0; i < args.inputCount(); ++i) {
const bool isInput = true;
pushKernelArgs(dofInputs[i], isInput, i);
}
for (int i = 0; i < args.outputCount(); ++i) {
const bool isInput = false;
pushKernelArgs(dofOutputs[i], isInput, i);
}
applyAddKernel.run();
}
void CpuOperator::pushKernelArgs(Vector *vec, const bool isInput, const int index) {
const OperatorField &opField = args.getOpField(isInput, index);
const QFunctionField &qfField = args.getQfField(isInput, index);
if (opField.hasBasis()) {
if (opField.usingTensorBasis()) {
pushTensorBasisKernelArgs(qfField, *((TensorBasis *)opField.basis));
} else {
pushSimplexBasisKernelArgs(qfField, *((SimplexBasis *)opField.basis));
}
}
if (vec) {
if (isInput) {
applyAddKernel.pushArg(vec->getConstKernelArg());
} else {
applyAddKernel.pushArg(vec->getKernelArg());
}
} else {
applyAddKernel.pushArg(::occa::null);
}
}
void CpuOperator::pushTensorBasisKernelArgs(const QFunctionField &qfField, TensorBasis &basis) {
switch (qfField.evalMode) {
case CEED_EVAL_INTERP: {
applyAddKernel.pushArg(basis.interp1D);
break;
}
case CEED_EVAL_GRAD: {
applyAddKernel.pushArg(basis.interp1D);
applyAddKernel.pushArg(basis.grad1D);
break;
}
case CEED_EVAL_WEIGHT: {
applyAddKernel.pushArg(basis.qWeight1D);
break;
}
default: {
}
}
}
void CpuOperator::pushSimplexBasisKernelArgs(const QFunctionField &qfField, SimplexBasis &basis) {
switch (qfField.evalMode) {
case CEED_EVAL_INTERP: {
applyAddKernel.pushArg(basis.interp);
break;
}
case CEED_EVAL_GRAD: {
applyAddKernel.pushArg(basis.grad);
break;
}
case CEED_EVAL_WEIGHT: {
applyAddKernel.pushArg(basis.qWeight);
break;
}
default: {
}
}
}
::occa::properties CpuOperator::getKernelProps() {
::occa::properties props = qfunction->getKernelProps(ceedQ);
props["defines/OCCA_Q"] = ceedQ;
return props;
}
void CpuOperator::applyAdd(Vector *in, Vector *out) {
setupVectors();
setupInputs(in);
applyQFunction();
setupOutputs(out);
freeVectors();
}
::occa::kernel CpuOperator::buildApplyAddKernel() {
std::stringstream ss;
addBasisFunctionSource(ss);
addKernelSource(ss);
const std::string kernelSource = ss.str();
CeedDebug(ceed, kernelSource.c_str());
return getDevice().buildKernelFromString(kernelSource, "applyAdd", getKernelProps());
}
void CpuOperator::addBasisFunctionSource(std::stringstream &ss) {
BasisVector sourceBasis;
for (int i = 0; i < args.inputCount(); ++i) {
addBasisIfMissingSource(sourceBasis, args.getOpInput(i).basis);
}
for (int i = 0; i < args.outputCount(); ++i) {
addBasisIfMissingSource(sourceBasis, args.getOpOutput(i).basis);
}
ss << std::endl;
const int basisCount = (int)sourceBasis.size();
for (int i = 0; i < basisCount; ++i) {
Basis &basis = *(sourceBasis[i]);
ss << "// Code generation for basis " << i + 1 << std::endl << "//---[ START ]-------------------------------" << std::endl;
if (basis.isTensorBasis()) {
TensorBasis &basisTensor = (TensorBasis &)basis;
ss << "#undef TENSOR_FUNCTION" << std::endl
<< "#undef P1D" << std::endl
<< "#undef Q1D" << std::endl
<< "#define P1D " << basisTensor.P1D << std::endl
<< "#define Q1D " << basisTensor.Q1D << std::endl;
} else {
SimplexBasis &basisSimplex = (SimplexBasis &)basis;
ss << "#undef SIMPLEX_FUNCTION" << std::endl
<< "#undef DIM" << std::endl
<< "#undef P" << std::endl
<< "#undef Q" << std::endl
<< "#define DIM " << basisSimplex.dim << std::endl
<< "#define P " << basisSimplex.P << std::endl
<< "#define Q " << basisSimplex.Q << std::endl;
}
ss << std::endl << basis.getFunctionSource() << std::endl << "//---[ END ]---------------------------------" << std::endl;
}
}
void CpuOperator::addBasisIfMissingSource(BasisVector &sourceBasis, Basis *basis) {
if (!basis) {
return;
}
const int existingBasisCount = (int)sourceBasis.size();
for (int i = 0; i < existingBasisCount; ++i) {
Basis *other = sourceBasis[i];
if (basis->isTensorBasis() != other->isTensorBasis()) {
continue;
}
if (basis->dim == other->dim && basis->P == other->P && basis->Q == other->Q) {
return;
}
}
sourceBasis.push_back(basis);
}
void CpuOperator::addKernelSource(std::stringstream &ss) {
ss << std::endl;
ss << "@kernel void applyAdd(" << std::endl;
addKernelArgsSource(ss);
ss << std::endl
<< ") {" << std::endl
<< " @tile(128, @outer, @inner)" << std::endl
<< " for (int element = 0; element < elementCount; ++element) {" << std::endl;
#if CEED_OCCA_PRINT_KERNEL_HASHES
ss << " if (element == 0) {" << std::endl
<< " printf(\"\\n\\nOperator Kernel: \" OKL_KERNEL_HASH \"\\n\\n\");" << std::endl
<< " }" << std::endl;
#endif
addQuadArraySource(ss);
ss << std::endl << " // [Start] Transforming inputs to quadrature points" << std::endl;
addInputSetupSource(ss);
ss << " // [End] Transforming inputs to quadrature points" << std::endl << std::endl;
addQFunctionApplicationSource(ss);
ss << std::endl << " // [Start] Transforming outputs to quadrature points" << std::endl;
addOutputSetupSource(ss);
ss << " // [End] Transforming outputs to quadrature points" << std::endl;
ss << " }" << std::endl << "}" << std::endl;
}
void CpuOperator::addKernelArgsSource(std::stringstream &ss) {
ss << " void *ctx," << std::endl << " const CeedInt elementCount";
for (int i = 0; i < args.inputCount(); ++i) {
const bool isInput = true;
addKernelArgSource(ss, isInput, i);
}
for (int i = 0; i < args.outputCount(); ++i) {
const bool isInput = false;
addKernelArgSource(ss, isInput, i);
}
}
void CpuOperator::addKernelArgSource(std::stringstream &ss, const bool isInput, const int index) {
const OperatorField &opField = args.getOpField(isInput, index);
const QFunctionField &qfField = args.getQfField(isInput, index);
std::stringstream dimAttribute;
if (opField.hasBasis()) {
ss << ',' << std::endl;
if (opField.usingTensorBasis()) {
addTensorKernelArgSource(ss, isInput, index, opField, qfField, dimAttribute);
} else {
addSimplexKernelArgSource(ss, isInput, index, opField, qfField, dimAttribute);
}
}
ss << ',' << std::endl;
if (isInput) {
ss << " const CeedScalar *" << dofInputVar(index) << dimAttribute.str();
} else {
ss << " CeedScalar *" << dofOutputVar(index) << dimAttribute.str();
}
}
void CpuOperator::addTensorKernelArgSource(std::stringstream &ss, const bool isInput, const int index, const OperatorField &opField,
const QFunctionField &qfField, std::stringstream &dimAttribute) {
TensorBasis &basis = *((TensorBasis *)opField.basis);
dimAttribute << " @dim(";
if (qfField.evalMode == CEED_EVAL_INTERP) {
ss << " const CeedScalar *" << interpVar(isInput, index);
for (int i = 0; i < basis.dim; ++i) {
dimAttribute << basis.P1D << ", ";
}
dimAttribute << basis.ceedComponentCount << ", elementCount";
} else if (qfField.evalMode == CEED_EVAL_GRAD) {
ss << " const CeedScalar *" << interpVar(isInput, index) << ',' << std::endl << " const CeedScalar *" << gradVar(isInput, index);
for (int i = 0; i < basis.dim; ++i) {
dimAttribute << basis.P1D << ", ";
}
dimAttribute << basis.ceedComponentCount << ", elementCount";
} else if (qfField.evalMode == CEED_EVAL_WEIGHT) {
ss << " const CeedScalar *" << qWeightVar(isInput, index);
for (int i = 0; i < basis.dim; ++i) {
dimAttribute << basis.Q1D << ", ";
}
dimAttribute << "elementCount";
} else {
dimAttribute.str("");
return;
}
dimAttribute << ")";
}
void CpuOperator::addSimplexKernelArgSource(std::stringstream &ss, const bool isInput, const int index, const OperatorField &opField,
const QFunctionField &qfField, std::stringstream &dimAttribute) {
SimplexBasis &basis = *((SimplexBasis *)opField.basis);
dimAttribute << " @dim(";
if (qfField.evalMode == CEED_EVAL_INTERP) {
ss << " const CeedScalar *" << interpVar(isInput, index);
dimAttribute << basis.P << ", " << basis.ceedComponentCount << ", elementCount";
} else if (qfField.evalMode == CEED_EVAL_GRAD) {
ss << " const CeedScalar *" << gradVar(isInput, index);
dimAttribute << basis.P << ", " << basis.ceedComponentCount << ", elementCount";
} else if (qfField.evalMode == CEED_EVAL_WEIGHT) {
ss << " const CeedScalar *" << qWeightVar(isInput, index);
dimAttribute << basis.Q << ", "
<< "elementCount";
} else {
dimAttribute.str("");
return;
}
dimAttribute << ")";
}
void CpuOperator::addQuadArraySource(std::stringstream &ss) {
const int inputs = args.inputCount();
const int outputs = args.outputCount();
const std::string quadInput = "quadInput";
const std::string quadOutput = "quadOutput";
ss << " // Store the transformed input quad values" << std::endl;
for (int i = 0; i < inputs; ++i) {
const bool isInput = true;
addSingleQfunctionQuadArraySource(ss, isInput, i, quadInput);
}
ss << std::endl << " // Store the transformed output quad values" << std::endl;
for (int i = 0; i < outputs; ++i) {
const bool isInput = false;
addSingleQfunctionQuadArraySource(ss, isInput, i, quadOutput);
}
ss << std::endl;
ss << std::endl << " // Store all input pointers in a single array" << std::endl;
addQfunctionQuadArraySource(ss, true, inputs, quadInput);
ss << std::endl << " // Store all output pointers in a single array" << std::endl;
addQfunctionQuadArraySource(ss, false, outputs, quadOutput);
ss << std::endl;
}
void CpuOperator::addSingleQfunctionQuadArraySource(std::stringstream &ss, const bool isInput, const int index, const std::string &name) {
const OperatorField &opField = args.getOpField(isInput, index);
CeedEvalMode evalMode = args.getEvalMode(isInput, index);
if (evalMode == CEED_EVAL_GRAD) {
ss << " CeedScalar " << indexedVar(name, index) << "[" << opField.getDim() << "]"
<< "[" << opField.getComponentCount() << "]"
<< "[OCCA_Q];" << std::endl;
} else if (evalMode == CEED_EVAL_INTERP) {
ss << " CeedScalar " << indexedVar(name, index) << "[" << opField.getComponentCount() << "]"
<< "[OCCA_Q];" << std::endl;
} else {
const QFunctionField &qfField = args.getQfField(isInput, index);
ss << " CeedScalar " << indexedVar(name, index) << "[OCCA_Q * " << qfField.size << "];" << std::endl;
}
}
void CpuOperator::addQfunctionQuadArraySource(std::stringstream &ss, const bool isInput, const int count, const std::string &name) {
const std::string arrayName = name + "s";
ss << " CeedScalar *" << arrayName << "[" << count << "] = {" << std::endl;
for (int i = 0; i < count; ++i) {
if (i) {
ss << ',' << std::endl;
}
ss << " (CeedScalar*) " << indexedVar(name, i);
}
ss << std::endl << " };" << std::endl;
}
void CpuOperator::addInputSetupSource(std::stringstream &ss) {
const bool isInput = true;
addBasisApplySource(ss, isInput, args.inputCount());
}
void CpuOperator::addOutputSetupSource(std::stringstream &ss) {
const bool isInput = false;
addBasisApplySource(ss, isInput, args.outputCount());
}
void CpuOperator::addBasisApplySource(std::stringstream &ss, const bool isInput, const int count) {
for (int i = 0; i < count; ++i) {
CeedEvalMode evalMode = args.getEvalMode(isInput, i);
if (evalMode == CEED_EVAL_INTERP) {
addInterpSource(ss, isInput, i);
} else if (evalMode == CEED_EVAL_GRAD) {
const bool hasTensorBasis = args.getOpField(isInput, i).usingTensorBasis();
if (hasTensorBasis) {
addGradTensorSource(ss, isInput, i);
} else {
addGradSimplexSource(ss, isInput, i);
}
} else if (evalMode == CEED_EVAL_WEIGHT) {
addWeightSource(ss, isInput, i);
} else if (evalMode == CEED_EVAL_NONE) {
addCopySource(ss, isInput, i);
}
}
}
void CpuOperator::addInterpSource(std::stringstream &ss, const bool isInput, const int index) {
const OperatorField &opField = args.getOpField(isInput, index);
const bool usingTensorBasis = opField.usingTensorBasis();
const int components = opField.getComponentCount();
const int dim = opField.getDim();
const std::string weights = interpVar(isInput, index);
std::string dimArgs;
if (usingTensorBasis) {
for (int i = 0; i < dim; ++i) {
if (i) {
dimArgs += ", ";
}
dimArgs += '0';
}
} else {
dimArgs = "0";
}
std::string input, output;
if (isInput) {
input = "&" + dofInputVar(index) + "(" + dimArgs + ", component, element)";
output = "(CeedScalar*) " + indexedVar("quadInput", index) + "[component]";
} else {
input = "(CeedScalar*) " + indexedVar("quadOutput", index) + "[component]";
output = "&" + dofOutputVar(index) + "(" + dimArgs + ", component, element)";
}
ss << " // Applying interp (" << xputName(isInput) << ": " << index << ")" << std::endl
<< " for (int component = 0; component < " << components << "; ++component) {" << std::endl
<< " " << elementFunction(isInput, index) << "(" << std::endl
<< " " << weights << ',' << std::endl
<< " " << input << ',' << std::endl
<< " " << output << std::endl
<< " );" << std::endl
<< " }" << std::endl
<< std::endl;
}
void CpuOperator::addGradTensorSource(std::stringstream &ss, const bool isInput, const int index) {
const OperatorField &opField = args.getOpField(isInput, index);
const int components = opField.getComponentCount();
const int dim = opField.getDim();
const std::string B = interpVar(isInput, index);
const std::string Bx = gradVar(isInput, index);
std::string dimArgs;
for (int i = 0; i < dim; ++i) {
if (i) {
dimArgs += ", ";
}
dimArgs += '0';
}
std::string inputs, outputs;
if (isInput) {
inputs = "&" + dofInputVar(index) + "(" + dimArgs + ", component, element)";
for (int i = 0; i < dim; ++i) {
if (i) {
outputs += ",\n ";
}
const std::string iStr = std::to_string(i);
outputs += "(CeedScalar*) " + indexedVar("quadInput", index) + "[" + iStr + "][component]";
}
} else {
for (int i = 0; i < dim; ++i) {
if (i) {
inputs += ",\n ";
}
const std::string iStr = std::to_string(i);
inputs += "(CeedScalar*) " + indexedVar("quadOutput", index) + "[" + iStr + "][component]";
}
outputs = "&" + dofOutputVar(index) + "(" + dimArgs + ", component, element)";
}
ss << " // Applying grad-tensor (" << xputName(isInput) << ": " << index << ")" << std::endl
<< " for (int component = 0; component < " << components << "; ++component) {" << std::endl
<< " " << elementFunction(isInput, index) << "(" << std::endl
<< " " << B << ',' << std::endl
<< " " << Bx << ',' << std::endl
<< " " << inputs << ',' << std::endl
<< " " << outputs << std::endl
<< " );" << std::endl
<< " }" << std::endl
<< std::endl;
}
void CpuOperator::addGradSimplexSource(std::stringstream &ss, const bool isInput, const int index) {
const int components = (args.getOpField(isInput, index).getComponentCount());
const std::string weights = gradVar(isInput, index);
std::string input, output;
if (isInput) {
input = "&" + dofInputVar(index) + "(0, component, element)";
output = "(CeedScalar*) " + indexedVar("quadInput", index) + "[component]";
} else {
input = "(CeedScalar*) " + indexedVar("quadOutput", index) + "[component]";
output = "&" + dofOutputVar(index) + "(0, component, element)";
}
ss << " // Applying grad-simplex (" << xputName(isInput) << ": " << index << ")" << std::endl
<< " for (int component = 0; component < " << components << "; ++component) {" << std::endl
<< " " << elementFunction(isInput, index) << "(" << std::endl
<< " " << weights << ',' << std::endl
<< " " << input << ',' << std::endl
<< " " << output << std::endl
<< " );" << std::endl
<< " }" << std::endl
<< std::endl;
}
void CpuOperator::addWeightSource(std::stringstream &ss, const bool isInput, const int index) {
const std::string weights = qWeightVar(isInput, index);
std::string output;
if (isInput) {
output = "(CeedScalar*) " + indexedVar("quadInput", index);
} else {
output = "&" + dofOutputVar(index) + "(0, element)";
}
ss << " // Applying weight (" << xputName(isInput) << ": " << index << ")" << std::endl
<< " " << elementFunction(isInput, index) << "(" << std::endl
<< " " << weights << ',' << std::endl
<< " " << output << std::endl
<< " );" << std::endl
<< std::endl;
}
void CpuOperator::addCopySource(std::stringstream &ss, const bool isInput, const int index) {
const QFunctionField &qfField = args.getQfField(isInput, index);
const std::string size = std::to_string(qfField.size);
std::string input, output;
if (isInput) {
input += dofInputVar(index) + "[q + (OCCA_Q * (field + element * " + size + "))]";
output += indexedVar("quadInput", index) + "[q + field * OCCA_Q]";
} else {
input = indexedVar("quadOutput", index) + "[q + field * OCCA_Q]";
output = dofOutputVar(index) + "[q + (OCCA_Q * (field + element * " + size + "))]";
}
ss << " // Copying source directly (" << xputName(isInput) << ": " << index << ")" << std::endl
<< " for (int field = 0; field < " << size << "; ++field) {" << std::endl
<< " for (int q = 0; q < OCCA_Q; ++q) {" << std::endl
<< " " << output << " = " << input << ";" << std::endl
<< " }" << std::endl
<< " }" << std::endl
<< std::endl;
}
void CpuOperator::addQFunctionApplicationSource(std::stringstream &ss) {
ss << " // Apply qFunction" << std::endl
<< " " << qfunction->qFunctionName << "(ctx, OCCA_Q, quadInputs, quadOutputs);" << std::endl
<< std::endl;
}
std::string CpuOperator::elementFunction(const bool isInput, const int index) {
return fullFieldFunctionName(isInput, args.getOpField(isInput, index), args.getQfField(isInput, index));
}
std::string CpuOperator::fieldFunctionName(const QFunctionField &qfField) {
switch (qfField.evalMode) {
case CEED_EVAL_INTERP:
return "interp";
case CEED_EVAL_GRAD:
return "grad";
case CEED_EVAL_WEIGHT:
return "weight";
default:
return "none";
}
}
std::string CpuOperator::fullFieldFunctionName(const bool isInput, const OperatorField &opField, const QFunctionField &qfField) {
const bool usingTensorBasis = opField.usingTensorBasis();
std::stringstream ss;
int dim, Q, P;
if (usingTensorBasis) {
TensorBasis &basis = *((TensorBasis *)opField.basis);
dim = basis.dim;
Q = basis.Q1D;
P = basis.P1D;
ss << "tensor_";
} else {
SimplexBasis &basis = *((SimplexBasis *)opField.basis);
dim = basis.dim;
Q = basis.Q;
P = basis.P;
ss << "simplex_";
}
ss << dim << "d_" << fieldFunctionName(qfField) << "Element";
if (!isInput) {
ss << "Transpose";
}
ss << "_Q" << Q << "_P" << P;
return ss.str();
}
} }