import numpy as np
from llama_cpp import Llama
model_path = "/home/joseph/Models/qwen2.5-0.5b-instruct-q4_k_m.gguf"
llm = Llama(model_path=model_path, n_ctx=512, verbose=False, logits_all=True)
tokens = [28] llm.reset()
llm.eval(tokens)
logits = np.array(llm._scores[-1])
print("=== llama-cpp-python single token '=' ===")
print(f"Logits: min={logits.min():.4f}, max={logits.max():.4f}")
print(f"Logits: mean={logits.mean():.4f}, std={logits.std():.4f}")
print(f"Sum: {logits.sum():.4f}")
top_idx = np.argsort(logits)[::-1][:10]
print("\nTop 10 predictions:")
for idx in top_idx:
tok = llm.detokenize([idx]).decode('utf-8', errors='replace')
print(f" Token {idx} ({repr(tok)}): {logits[idx]:.4f}")
print("\n=== Our implementation comparison ===")
print("Our logits: min=-15.46, max=9.82")
print("Our logits: mean=-2.50, std=2.94")
print("Our sum: (not computed)")
print("\n=== Analysis ===")
print("The logit scales are different but in a similar ballpark.")
print("More critically, our top predictions are completely wrong.")
print("\nllama-cpp top: ' ', '1', '2', '0', '3' (all sensible after '=')")
print("Our top: 100258, 48888, 33044... (seemingly random tokens)")
print("\n=== Decoding our top predicted tokens ===")
our_top = [100258, 48888, 33044, 119099, 28341]
for idx in our_top:
try:
tok = llm.detokenize([idx]).decode('utf-8', errors='replace')
print(f" Token {idx}: {repr(tok)}")
except:
print(f" Token {idx}: <decode error>")