import numpy as np
from llama_cpp import Llama
import struct
def read_gguf_embeddings():
model_path = "/home/joseph/Models/qwen2.5-0.5b-instruct-q4_k_m.gguf"
llm = Llama(model_path=model_path, n_ctx=4, embedding=True, verbose=False)
tokens = [16, 28, 10]
print("Embeddings via llama-cpp-python:")
for tok in tokens:
llm.reset()
emb = llm.embed([tok]) if emb is not None:
emb = np.array(emb[0]) print(f" Token {tok}: shape={emb.shape}, first5={emb[:5]}")
if __name__ == "__main__":
read_gguf_embeddings()