#pragma once
#include "ggml.h"
#include "openvino/runtime/core.hpp"
#define CL_TARGET_OPENCL_VERSION 300
#include <CL/cl.h>
#include <cstdlib>
#include <memory>
#include <openvino/core/node.hpp>
#include <openvino/runtime/remote_context.hpp>
#include <openvino/runtime/tensor.hpp>
#include <optional>
#include <string>
enum class ExtraQuantType { F16, Q4_0_C, Q8_1_C, Q4_0_128, Q8_0_C, Q8_0_32 };
ov::Core & ov_singleton_core();
std::optional<ov::RemoteContext> ggml_openvino_get_remote_context();
const ov::AnyMap & ggml_openvino_get_compile_config();
cl_command_queue ggml_openvino_get_cl_queue();
typedef cl_int(CL_API_CALL * clEnqueueMemFillINTEL_fn)(cl_command_queue queue,
void * dst_ptr,
const void * pattern,
size_t pattern_size,
size_t size,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
typedef cl_int(CL_API_CALL * clEnqueueMemcpyINTEL_fn)(cl_command_queue queue,
cl_bool blocking,
void * dst_ptr,
const void * src_ptr,
size_t size,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
clEnqueueMemFillINTEL_fn ggml_openvino_get_clEnqueueMemFillINTEL();
clEnqueueMemcpyINTEL_fn ggml_openvino_get_clEnqueueMemcpyINTEL();
struct ggml_openvino_device_config {
std::string device_name = "CPU";
bool is_npu = false;
bool initialized = false;
std::optional<ov::RemoteContext> remote_context;
ov::AnyMap compile_config;
cl_command_queue cl_queue = nullptr;
void init();
~ggml_openvino_device_config();
};
ggml_openvino_device_config & ggml_openvino_get_device_config();
void ggml_openvino_init_device_config();
const std::string & ggml_openvino_get_device_name();
bool ggml_openvino_is_npu();
std::optional<ExtraQuantType> ggml_openvino_get_requant_type(const ggml_tensor * tensor, bool no_requant = false);
struct ggml_openvino_extra_base {
enum class Type { WEIGHT, QUANTIZED_WEIGHT, TENSOR };
Type type;
virtual ~ggml_openvino_extra_base() = default;
protected:
explicit ggml_openvino_extra_base(Type t) : type(t) {}
};
struct ggml_openvino_weight_extra : public ggml_openvino_extra_base {
ov::Tensor weights; std::shared_ptr<ov::Node> weight_node;
ggml_openvino_weight_extra(ov::Tensor w, std::shared_ptr<ov::Node> n) :
ggml_openvino_extra_base(Type::WEIGHT),
weights(std::move(w)),
weight_node(std::move(n)) {}
};
struct ggml_openvino_quantized_weight_extra : public ggml_openvino_extra_base {
ov::Tensor weights; ov::Tensor scales; ov::Tensor zp; std::shared_ptr<ov::Node> weight_node;
ggml_openvino_quantized_weight_extra(ov::Tensor w, ov::Tensor s, ov::Tensor z, std::shared_ptr<ov::Node> n) :
ggml_openvino_extra_base(Type::QUANTIZED_WEIGHT),
weights(std::move(w)),
scales(std::move(s)),
zp(std::move(z)),
weight_node(std::move(n)) {}
};
struct ggml_openvino_tensor_extra : public ggml_openvino_extra_base {
std::shared_ptr<ov::Tensor> tensor;
explicit ggml_openvino_tensor_extra(std::shared_ptr<ov::Tensor> t)
: ggml_openvino_extra_base(Type::TENSOR), tensor(std::move(t)) {}
};
struct ggml_openvino_extracted_layout {
size_t total_size = 0; size_t weights_offset = 0; size_t weights_size = 0; size_t scales_offset = 0; size_t scales_size = 0; size_t zp_offset = 0; size_t zp_size = 0; bool is_u4; int64_t weights_per_block; bool is_symmetric;
bool is_requant = false; std::optional<ExtraQuantType> requant_type; };
ggml_openvino_extracted_layout ggml_openvino_get_extracted_layout(const ggml_tensor * tensor, bool use_bias = false);
ggml_openvino_tensor_extra * ggml_openvino_create_tensor_extra(const ggml_tensor * tensor, bool is_remote);
void ggml_openvino_buffer_register_extra(ggml_tensor * tensor, ggml_openvino_extra_base * extra);
struct ggml_backend_openvino_context {
int device = 0;
std::string name = "OpenVINO";
std::string description = "OpenVINO Backend Context";
std::shared_ptr<void> runtime_context = nullptr;
ggml_backend_openvino_context() = default;
};