#include "models.h"
#include <cmath>
ggml_cgraph * clip_graph_gemma4uv::build() {
ggml_tensor * inp_raw = build_inp_raw();
float eps = 1e-5f;
ggml_tensor * inp = nullptr;
{
auto c = inp_raw->ne[2];
ggml_tensor * kernel = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, patch_size, patch_size, c);
inp = ggml_im2col(ctx0, kernel, inp_raw, patch_size, patch_size, 0, 0, 1, 1, true, inp_raw->type);
inp = ggml_reshape_2d(ctx0, inp, inp->ne[0], inp->ne[1] * inp->ne[2] * inp->ne[3]);
inp = build_norm(inp, model.patch_norm_1_w, model.patch_norm_1_b, NORM_TYPE_NORMAL, eps, -1);
inp = ggml_mul_mat(ctx0, model.patch_embeddings_0, inp);
inp = ggml_add(ctx0, inp, model.patch_bias);
inp = build_norm(inp, model.patch_norm_2_w, model.patch_norm_2_b, NORM_TYPE_NORMAL, eps, -1);
}
ggml_tensor * pos_x = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_patches);
ggml_set_name(pos_x, "pos_x");
ggml_set_input(pos_x);
ggml_tensor * pos_y = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_patches);
ggml_set_name(pos_y, "pos_y");
ggml_set_input(pos_y);
{
const int64_t pos_size = model.position_embeddings->ne[1];
const size_t nb1 = ggml_row_size(model.position_embeddings->type, n_embd);
ggml_tensor * tbl_x = ggml_view_2d(ctx0, model.position_embeddings,
n_embd, pos_size, nb1, 0);
ggml_tensor * tbl_y = ggml_view_2d(ctx0, model.position_embeddings,
n_embd, pos_size, nb1, pos_size * nb1);
ggml_tensor * emb_x = ggml_get_rows(ctx0, tbl_x, pos_x);
ggml_tensor * emb_y = ggml_get_rows(ctx0, tbl_y, pos_y);
inp = ggml_add(ctx0, inp, emb_x);
inp = ggml_add(ctx0, inp, emb_y);
cb(inp, "pos_embd", -1);
inp = build_norm(inp, model.patch_norm_3_w, model.patch_norm_3_b, NORM_TYPE_NORMAL, eps, -1);
}
auto cur = inp;
{
cur = ggml_rms_norm(ctx0, cur, hparams.eps);
cur = build_mm(model.mm_input_proj_w, cur);
cb(cur, "projected", -1);
}
ggml_build_forward_expand(gf, cur);
return gf;
}