import numpy as np
from llama_cpp import Llama
model_path = "/home/joseph/Models/qwen2.5-0.5b-instruct-q4_k_m.gguf"
llm = Llama(
model_path=model_path,
n_ctx=512,
n_batch=512,
verbose=False,
)
tokens = llm.tokenize(b"=", add_bos=False)
print(f"Token for '=': {tokens}")
llm.reset()
llm.eval(tokens)
logits = llm.scores[0]
top_indices = np.argsort(logits)[::-1][:10]
print("\nTop 10 predictions for single token '=':")
for i, idx in enumerate(top_indices):
try:
tok_str = llm.detokenize([idx]).decode('utf-8', errors='replace')
except:
tok_str = f"<token {idx}>"
print(f" {i+1}. Token {idx} '{tok_str}': {logits[idx]:.4f}")
llm.reset()
tokens = llm.tokenize(b"1+1=", add_bos=False)
print(f"\nTokens for '1+1=': {tokens}")
llm.eval(tokens)
logits = llm.scores[len(tokens) - 1]
top_indices = np.argsort(logits)[::-1][:10]
print("\nTop 10 predictions for '1+1=':")
for i, idx in enumerate(top_indices):
try:
tok_str = llm.detokenize([idx]).decode('utf-8', errors='replace')
except:
tok_str = f"<token {idx}>"
print(f" {i+1}. Token {idx} '{tok_str}': {logits[idx]:.4f}")
tok_2 = llm.tokenize(b"2", add_bos=False)[0]
print(f"\nToken for '2': {tok_2}")
rank_2 = np.where(top_indices == tok_2)[0]
if len(rank_2) > 0:
print(f"Token '2' rank: {rank_2[0] + 1}")
else:
all_sorted = np.argsort(logits)[::-1]
rank = np.where(all_sorted == tok_2)[0][0] + 1
print(f"Token '2' rank: {rank}")
print(f"Token '2' logit: {logits[tok_2]:.4f}")
print(f"\nLogit stats: min={logits.min():.4f}, max={logits.max():.4f}, mean={logits.mean():.4f}, std={logits.std():.4f}")