#ifndef HTP_OPNODE_H
#define HTP_OPNODE_H
#define GGML_COMMON_IMPL_CPP
#include "ggml-backend-impl.h"
#include "ggml-common.h"
#include <string>
#include <vector>
#include <stdio.h>
#include "htp-ops.h"
struct htp_opnode {
ggml_tensor * node = nullptr;
std::vector<ggml_tensor *> fused;
htp_op_code opcode = HTP_OP_INVALID;
ggml_op op() const {
return node->op;
}
const ggml_tensor * dst() const {
return fused.empty() ? node : fused.back();
}
const ggml_tensor * src0() const {
return node->src[0];
}
const ggml_tensor * src1() const {
return node->src[1];
}
bool is_empty() const {
return ggml_op_is_empty(node->op);
}
void add_fused(ggml_tensor * t) {
fused.push_back(t);
}
bool stackable() const {
switch (this->op()) {
case GGML_OP_MUL_MAT:
case GGML_OP_MUL_MAT_ID:
return ggml_is_quantized(this->src0()->type);
default:
return false;
}
}
bool same_input(const htp_opnode& n) const {
return n.src1() == this->src1();
}
std::vector<const ggml_tensor *> get_inputs() const {
if (fused.empty()) {
int last_non_null = -1;
for (int i = 0; i < GGML_MAX_SRC; i++) {
if (node->src[i]) {
last_non_null = i;
}
}
std::vector<const ggml_tensor *> inputs(last_non_null + 1, nullptr);
for (int i = 0; i <= last_non_null; i++) {
inputs[i] = node->src[i];
}
return inputs;
}
std::vector<const ggml_tensor *> inputs(GGML_MAX_SRC, nullptr);
std::vector<const ggml_tensor *> outputs;
outputs.push_back(node);
for (const auto * f : fused) {
outputs.push_back(f);
}
auto contains = [&](const std::vector<const ggml_tensor *> & vec, const ggml_tensor * t) {
for (const auto * x : vec) {
if (x == t) return true;
}
return false;
};
int count = 0;
auto add_input = [&](const ggml_tensor * t) {
if (t && !contains(outputs, t) && !contains(inputs, t)) {
if (count < (int)inputs.size()) {
inputs[count++] = t;
} else {
inputs.push_back(t);
}
}
};
for (int i = 0; i < GGML_MAX_SRC; i++) {
if (node->src[i]) {
add_input(node->src[i]);
}
}
for (const auto * f : fused) {
for (int i = 0; i < GGML_MAX_SRC; i++) {
if (f->src[i]) {
add_input(f->src[i]);
}
}
}
inputs.resize(count);
return inputs;
}
std::string op_name() const {
if (fused.empty()) {
return ggml_op_desc(node);
}
std::string name = ggml_op_desc(node);
for (const auto * f : fused) {
name += "+";
name += ggml_op_desc(f);
}
return name;
}
};
struct htp_opformat {
char strides[64 * GGML_MAX_SRC];
char dims[64 * GGML_MAX_SRC];
char types[16 * GGML_MAX_SRC];
char buffs[64 * GGML_MAX_SRC];
char names[64 * GGML_MAX_SRC];
int format_tensor_dims(char * str, const struct ggml_tensor * t) {
if (!t) {
return sprintf(str, "NONE");
}
if (t->ne[2] == 1 && t->ne[3] == 1) {
return sprintf(str, "%d:%d", (int) t->ne[0], (int) t->ne[1]);
} else {
return sprintf(str, "%d:%d:%d:%d", (int) t->ne[0], (int) t->ne[1], (int) t->ne[2], (int) t->ne[3]);
}
}
void format_op_dims(char * str, const htp_opnode & node) {
char * p = str;
auto inputs = node.get_inputs();
if (!inputs.empty()) {
p += format_tensor_dims(p, inputs[0]);
for (size_t i = 1; i < inputs.size(); i++) {
p += sprintf(p, " x ");
p += format_tensor_dims(p, inputs[i]);
}
p += sprintf(p, " -> ");
}
char self[64];
format_tensor_dims(self, node.dst());
p += sprintf(p, "%s", self);
}
int format_tensor_strides(char * str, const struct ggml_tensor * t) {
if (!t) {
return sprintf(str, "NONE");
}
const char * c = ggml_is_contiguous(t) ? "" : "!";
if (t->ne[2] == 1 && t->ne[3] == 1) {
return sprintf(str, "%zu:%zu%s", (size_t) t->nb[0], (size_t) t->nb[1], c);
} else {
return sprintf(str, "%zu:%zu:%zu:%zu%s", (size_t) t->nb[0], (size_t) t->nb[1], (size_t) t->nb[2], (size_t) t->nb[3], c);
}
}
void format_op_strides(char * str, const htp_opnode & node) {
char * p = str;
auto inputs = node.get_inputs();
if (!inputs.empty()) {
p += format_tensor_strides(p, inputs[0]);
for (size_t i = 1; i < inputs.size(); i++) {
p += sprintf(p, " x ");
p += format_tensor_strides(p, inputs[i]);
}
p += sprintf(p, " -> ");
}
char self[64];
format_tensor_strides(self, node.dst());
p += sprintf(p, "%s", self);
}
void format_op_types(char * str, const htp_opnode & node) {
char * p = str;
auto inputs = node.get_inputs();
if (!inputs.empty()) {
p += sprintf(p, "%s", inputs[0] ? ggml_type_name(inputs[0]->type) : "NONE");
for (size_t i = 1; i < inputs.size(); i++) {
p += sprintf(p, " x ");
p += sprintf(p, "%s", inputs[i] ? ggml_type_name(inputs[i]->type) : "NONE");
}
p += sprintf(p, " -> ");
}
p += sprintf(p, "%s", ggml_type_name(node.dst()->type));
}
const char * tensor_buff_name(const struct ggml_tensor * t) {
if (t && t->buffer) {
return ggml_backend_buffer_name(t->buffer);
}
return "NONE";
}
void format_op_buffs(char * str, const htp_opnode & node) {
char * p = str;
auto inputs = node.get_inputs();
if (!inputs.empty()) {
p += sprintf(p, "%s", tensor_buff_name(inputs[0]));
for (size_t i = 1; i < inputs.size(); i++) {
p += sprintf(p, " x ");
p += sprintf(p, "%s", tensor_buff_name(inputs[i]));
}
p += sprintf(p, " -> ");
}
p += sprintf(p, "%s", tensor_buff_name(node.dst()));
}
void format_op_names(char * str, const htp_opnode & node) {
char * p = str;
auto inputs = node.get_inputs();
if (!inputs.empty()) {
p += sprintf(p, "%s", inputs[0] ? inputs[0]->name : "NONE");
for (size_t i = 1; i < inputs.size(); i++) {
p += sprintf(p, " x ");
p += sprintf(p, "%s", inputs[i] ? inputs[i]->name : "NONE");
}
p += sprintf(p, " -> ");
}
p += sprintf(p, "%s", node.dst()->name);
}
void format(const htp_opnode & node) {
format_op_dims(dims, node);
format_op_strides(strides, node);
format_op_types(types, node);
format_op_buffs(buffs, node);
format_op_names(names, node);
}
htp_opformat() {}
htp_opformat(const htp_opnode & node) { format(node); }
};
#endif