#include <ceed.h>
#include <ceed/backend.h>
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#include <sys/stat.h>
#define OCCA_PATH_MAX 4096
#ifndef __USE_GNU
#define __USE_GNU
#endif
#include <dlfcn.h>
#include "occa.h"
#define NO_OFFSET 0
#define TILE_SIZE 32
#define NO_PROPS occaDefault
typedef struct {
CeedScalar *h_array;
CeedScalar *h_array_allocated;
occaMemory d_array;
} CeedVector_Occa;
#define CEED_OCCA_NUM_RESTRICTION_KERNELS 8
typedef struct {
bool strided;
occaMemory d_indices;
occaMemory d_toffsets;
occaMemory d_tindices;
occaKernel kRestrict[CEED_OCCA_NUM_RESTRICTION_KERNELS];
} CeedElemRestriction_Occa;
typedef struct {
bool ready;
CeedElemRestriction er;
occaMemory qref1d;
occaMemory qweight1d;
occaMemory interp1d;
occaMemory grad1d;
occaMemory tmp0, tmp1;
occaKernel kZero, kInterp, kGrad, kWeight;
} CeedBasis_Occa;
typedef struct {
CeedVector *Evecs; CeedScalar **Edata;
CeedVector *evecsin; CeedVector *evecsout; CeedVector *qvecsin; CeedVector *qvecsout; CeedInt numein;
CeedInt numeout;
} CeedOperator_Occa;
#define N_MAX_IDX 16
typedef struct {
bool ready;
CeedInt idx, odx;
CeedInt iOf7[N_MAX_IDX];
CeedInt oOf7[N_MAX_IDX];
int nc, dim, nelem, elemsize, e;
occaMemory o_indata, o_outdata;
occaMemory d_ctx, d_idx, d_odx;
char *oklPath;
const char *qFunctionName;
occaKernel kQFunctionApply;
CeedOperator op;
} CeedQFunction_Occa;
typedef struct {
CeedScalar *h_data;
CeedScalar *h_data_allocated;
} CeedQFunctionContext_Occa;
typedef struct {
occaDevice device;
bool ocl;
char *libceed_dir;
char *occa_cache_dir;
} Ceed_Occa;
CEED_INTERN int CeedOklPath_Occa(const Ceed, const char *, const char *, char **);
CEED_INTERN int CeedOklDladdr_Occa(Ceed);
CEED_INTERN int CeedBasisCreateTensorH1_Occa(CeedInt dim, CeedInt P1d, CeedInt Q1d, const CeedScalar *interp1d, const CeedScalar *grad1d,
const CeedScalar *qref1d, const CeedScalar *qweight1d, CeedBasis basis);
CEED_INTERN int CeedBasisCreateH1_Occa(CeedElemTopology topo, CeedInt dim, CeedInt ndof, CeedInt nqpts, const CeedScalar *interp1d,
const CeedScalar *grad1d, const CeedScalar *qref1d, const CeedScalar *qweight1d, CeedBasis basis);
CEED_INTERN int CeedBasisApplyElems_Occa(CeedBasis basis, CeedInt Q, CeedTransposeMode tmode, CeedEvalMode emode, const CeedVector u, CeedVector v);
CEED_INTERN int CeedOperatorCreate_Occa(CeedOperator op);
CEED_INTERN int CeedQFunctionCreate_Occa(CeedQFunction qf);
CEED_INTERN int CeedQFunctionContextCreate_Occa(CeedQFunctionContext ctx);
CEED_INTERN int CeedElemRestrictionCreate_Occa(const CeedMemType mtype, const CeedCopyMode cmode, const CeedInt *indices, const bool *orients,
const CeedInt8 *curl_orients, const CeedElemRestriction res);
CEED_INTERN int CeedVectorCreate_Occa(CeedInt n, CeedVector vec);