import numpy as np
from gguf import GGUFReader
import struct
model_path = "/home/joseph/Models/qwen2.5-0.5b-instruct-q4_k_m.gguf"
reader = GGUFReader(model_path)
for tensor in reader.tensors:
if tensor.name == 'token_embd.weight':
print(f"=== token_embd.weight ===")
print(f"Shape (GGUF ne): {tensor.shape}") print(f"Type: {tensor.tensor_type}") print(f"Data shape: {tensor.data.shape}")
data = tensor.data print(f"\nRaw data shape: {data.shape}")
row_28 = data[28] print(f"Row 28 has {len(row_28)} bytes")
dequant_row = []
for block_idx in range(28): block_start = block_idx * 22
d_bytes = row_28[block_start:block_start+2]
d = np.frombuffer(d_bytes, dtype=np.float16)[0]
d = float(d)
qh = row_28[block_start+2:block_start+6]
qs = row_28[block_start+6:block_start+22]
for j in range(32):
q_lo = (qs[j // 2] >> (4 * (j % 2))) & 0x0F
q_hi = (qh[j // 8] >> (j % 8)) & 0x01
q = q_lo | (q_hi << 4)
dequant_val = d * (q - 16)
dequant_row.append(dequant_val)
dequant_row = np.array(dequant_row)
print(f"\nDequantized row 28 (token '='):")
print(f" Length: {len(dequant_row)}")
print(f" First 5: {dequant_row[:5]}")
print(f" Sum: {dequant_row.sum():.6f}")
print(f" Min: {dequant_row.min():.6f}, Max: {dequant_row.max():.6f}")
np.save('/tmp/py_embedding_28.npy', dequant_row)
print("\nSaved to /tmp/py_embedding_28.npy")
print("\n=== Our Rust implementation produces for token 28 ===")
print("First 5: [0.0055, -0.0166, -0.0166, 0.0193, 0.0193]")
print("Sum: -0.230461")
print("These should match if dequantization is correct")
break