import init, {
parseGgufMetadata,
loadModelFromBytesWithProgress,
} from './pkg/oxillama_wasm.js';
let engine = null;
let modelLoaded = false;
let generating = false;
let stopRequested = false;
let pendingModelBytes = null; let pendingTokenizerJson = null;
const modelFileInput = document.getElementById('model-file');
const tokenizerFileInput = document.getElementById('tokenizer-file');
const modelInfoDiv = document.getElementById('model-info');
const modelArchSpan = document.getElementById('model-arch');
const modelCtxSpan = document.getElementById('model-ctx');
const modelTensorsSpan = document.getElementById('model-tensors');
const loadBtn = document.getElementById('load-btn');
const loadProgressBar = document.getElementById('load-progress');
const loadProgressInner = document.getElementById('load-progress-inner');
const loadStatus = document.getElementById('load-status');
const promptInput = document.getElementById('prompt-input');
const maxTokensInput = document.getElementById('max-tokens');
const generateBtn = document.getElementById('generate-btn');
const stopBtn = document.getElementById('stop-btn');
const outputPre = document.getElementById('output');
const statsP = document.getElementById('stats');
function updateLoadBtn() {
loadBtn.disabled = !(pendingModelBytes && pendingTokenizerJson);
}
function fmtSize(bytes) {
if (bytes >= 1024 * 1024) {
return `${(bytes / 1024 / 1024).toFixed(1)} MB`;
}
return `${Math.round(bytes / 1024)} KB`;
}
async function main() {
await init();
modelFileInput.addEventListener('change', async (event) => {
const file = event.target.files[0];
if (!file) return;
loadStatus.textContent = `Reading ${file.name} (${fmtSize(file.size)})…`;
modelInfoDiv.hidden = true;
pendingModelBytes = null;
updateLoadBtn();
const buffer = await file.arrayBuffer();
pendingModelBytes = new Uint8Array(buffer);
try {
const meta = parseGgufMetadata(pendingModelBytes);
const archLabel = meta.arch ? meta.arch : 'unknown arch';
const ctxLabel = meta.context_length ? `ctx ${meta.context_length}` : '';
const tLabel = meta.tensor_count ? `${meta.tensor_count} tensors` : '';
modelArchSpan.textContent = archLabel;
modelCtxSpan.textContent = ctxLabel;
modelTensorsSpan.textContent = tLabel;
modelInfoDiv.hidden = false;
} catch (_) {
}
loadStatus.textContent = `GGUF ready (${fmtSize(file.size)}). Now pick tokenizer.json.`;
updateLoadBtn();
});
tokenizerFileInput.addEventListener('change', async (event) => {
const file = event.target.files[0];
if (!file) return;
loadStatus.textContent = `Reading tokenizer (${fmtSize(file.size)})…`;
pendingTokenizerJson = null;
updateLoadBtn();
pendingTokenizerJson = await file.text();
loadStatus.textContent = 'Both files ready. Click "Load Model" to initialize the engine.';
updateLoadBtn();
});
loadBtn.addEventListener('click', handleLoadModel);
generateBtn.addEventListener('click', handleGenerate);
stopBtn.addEventListener('click', () => { stopRequested = true; });
}
async function handleLoadModel() {
if (!pendingModelBytes || !pendingTokenizerJson) return;
engine = null;
modelLoaded = false;
generateBtn.disabled = true;
promptInput.disabled = true;
maxTokensInput.disabled = true;
loadBtn.disabled = true;
loadProgressBar.hidden = false;
loadProgressInner.style.width = '0%';
loadStatus.textContent = 'Initializing inference engine…';
try {
const onProgress = (pct) => {
loadProgressInner.style.width = `${pct}%`;
loadStatus.textContent = `Loading engine… ${pct}%`;
};
engine = await loadModelFromBytesWithProgress(
pendingModelBytes,
pendingTokenizerJson,
onProgress,
);
loadProgressInner.style.width = '100%';
loadStatus.textContent = `Engine ready! Model loaded (${fmtSize(pendingModelBytes.byteLength)}).`;
modelLoaded = true;
promptInput.disabled = false;
maxTokensInput.disabled = false;
generateBtn.disabled = false;
} catch (err) {
loadStatus.textContent = `Load error: ${err}`;
console.error('Model load failed:', err);
loadBtn.disabled = false; }
}
async function handleGenerate() {
if (!modelLoaded || !engine || generating) return;
const prompt = promptInput.value.trim();
if (!prompt) return;
generating = true;
stopRequested = false;
outputPre.textContent = '';
statsP.textContent = '';
generateBtn.disabled = true;
stopBtn.disabled = false;
const maxTokens = Math.max(1, parseInt(maxTokensInput.value, 10) || 200);
let tokenCount = 0;
const startTime = Date.now();
try {
const onToken = (token) => {
if (stopRequested) return;
outputPre.textContent += token;
outputPre.scrollTop = outputPre.scrollHeight;
tokenCount++;
const elapsed = (Date.now() - startTime) / 1000;
if (elapsed > 0) {
statsP.textContent = `${tokenCount} tokens · ${(tokenCount / elapsed).toFixed(1)} tok/s`;
}
};
await new Promise((resolve, reject) => {
try {
engine.generate(prompt, maxTokens, onToken);
resolve();
} catch (err) {
reject(err);
}
});
if (stopRequested) {
outputPre.textContent += '\n\n[Stopped by user]';
}
const elapsed = (Date.now() - startTime) / 1000;
if (elapsed > 0 && tokenCount > 0) {
statsP.textContent =
`${tokenCount} tokens in ${elapsed.toFixed(2)}s · ${(tokenCount / elapsed).toFixed(1)} tok/s`;
}
} catch (err) {
outputPre.textContent += `\n\n[Generation error: ${err}]`;
console.error('Generation failed:', err);
} finally {
generating = false;
generateBtn.disabled = false;
stopBtn.disabled = true;
}
}
main().catch((err) => {
console.error('Fatal WASM initialization error:', err);
const body = document.querySelector('main');
if (body) {
const errDiv = document.createElement('p');
errDiv.style.color = '#ef4444';
errDiv.style.marginTop = '1rem';
errDiv.textContent = `Failed to initialize OxiLLaMa WASM: ${err}`;
body.prepend(errDiv);
}
});