import numpy as np
from gguf import GGUFReader
model_path = "/home/joseph/Models/qwen2.5-0.5b-instruct-q4_k_m.gguf"
reader = GGUFReader(model_path)
for tensor in reader.tensors:
if tensor.name == 'output.weight':
print(f"=== output.weight ===")
print(f"Shape (GGUF ne): {tensor.shape}") print(f"Type: {tensor.tensor_type}") print(f"Data shape: {tensor.data.shape}")
data = tensor.data
row0 = data[0]
dequant_row0 = []
for block_idx in range(28): block_start = block_idx * 34
scale_bytes = row0[block_start:block_start+2]
scale = np.frombuffer(scale_bytes, dtype=np.float16)[0]
scale = float(scale)
quants = row0[block_start+2:block_start+34].astype(np.int8)
for q in quants:
dequant_row0.append(float(q) * scale)
dequant_row0 = np.array(dequant_row0)
print(f"\nFirst vocab token (token 0) weights:")
print(f" Length: {len(dequant_row0)}")
print(f" First 5: {dequant_row0[:5]}")
print(f" Last 5: {dequant_row0[-5:]}")
print(f" Sum: {dequant_row0.sum():.6f}")
row17 = data[17]
dequant_row17 = []
for block_idx in range(28):
block_start = block_idx * 34
scale_bytes = row17[block_start:block_start+2]
scale = np.frombuffer(scale_bytes, dtype=np.float16)[0]
scale = float(scale)
quants = row17[block_start+2:block_start+34].astype(np.int8)
for q in quants:
dequant_row17.append(float(q) * scale)
dequant_row17 = np.array(dequant_row17)
print(f"\nToken 17 ('2') weights:")
print(f" First 5: {dequant_row17[:5]}")
print(f" Last 5: {dequant_row17[-5:]}")
print(f" Sum: {dequant_row17.sum():.6f}")
np.save('/tmp/output_weight_row0.npy', dequant_row0)
np.save('/tmp/output_weight_row17.npy', dequant_row17)
print("\nSaved dequantized rows to /tmp/output_weight_row{0,17}.npy")
print("\n=== Full dequantization verification ===")
all_dequant = []
for row_idx in range(min(10, data.shape[0])): row = data[row_idx]
row_vals = []
for block_idx in range(28):
block_start = block_idx * 34
scale = float(np.frombuffer(row[block_start:block_start+2], dtype=np.float16)[0])
quants = row[block_start+2:block_start+34].astype(np.int8)
for q in quants:
row_vals.append(float(q) * scale)
all_dequant.append(row_vals)
all_dequant = np.array(all_dequant)
print(f"Dequantized shape: {all_dequant.shape}") print(f"This confirms: rows = vocab tokens, columns = hidden dims")
break
print("\n\n=== token_embd.weight ===")
for tensor in reader.tensors:
if tensor.name == 'token_embd.weight':
print(f"Shape (GGUF ne): {tensor.shape}") print(f"Type: {tensor.tensor_type}") print(f"Data shape: {tensor.data.shape}")
print(f"Note: Q4_K is more complex, but layout principle is same")
print(f"Each row (vocab token) has its own packed hidden dim weights")
break