llm-manager 1.1.0

Terminal UI for managing LLMs
Documentation
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>llm-manager Dashboard</title>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body { font-family: 'SF Mono', 'Fira Code', 'JetBrains Mono', monospace; background: #1a1b26; color: #c0caf5; padding: 24px; }
h1 { font-size: 1.4rem; color: #a9b1d8; margin-bottom: 16px; border-bottom: 1px solid #292e42; padding-bottom: 12px; }
.grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 16px; margin-bottom: 20px; }
.card { background: #24283b; border: 1px solid #292e42; border-radius: 8px; padding: 16px; }
.card h2 { font-size: 0.8rem; color: #565f89; text-transform: uppercase; letter-spacing: 1px; margin-bottom: 8px; }
.card .value { font-size: 1.8rem; font-weight: bold; }
.card .label { font-size: 0.85rem; color: #737aa2; margin-top: 4px; }
.model-name { font-size: 1.1rem; color: #9ece6a; margin-bottom: 12px; }
.state-badge { display: inline-block; padding: 4px 10px; border-radius: 4px; font-size: 0.8rem; font-weight: bold; }
.state-badge.loaded { background: #1a3a2a; color: #9ece6a; font-size: 2rem; }
.state-badge.unloaded { background: #3a2a1a; color: #e0af68; font-size: 2rem; }
.state-badge.loading { background: #2a2a3a; color: #7aa2f7; }
.bar-container { background: #1a1b26; border-radius: 4px; height: 8px; margin-top: 8px; overflow: hidden; }
.bar { height: 100%; border-radius: 4px; transition: width 0.3s ease; }
.bar.green { background: #9ece6a; }
.bar.yellow { background: #e0af68; }
.bar.red { background: #f7768e; }
.bar.blue { background: #7aa2f7; }
.status { font-size: 0.8rem; color: #565f89; margin-top: 12px; }
.status .dot { display: inline-block; width: 6px; height: 6px; border-radius: 50%; margin-right: 6px; }
.status .dot.live { background: #9ece6a; animation: pulse 1.5s infinite; }
.status .dot.off { background: #f7768e; }
@keyframes pulse { 0%, 100% { opacity: 1; } 50% { opacity: 0.3; } }
.meta { font-size: 0.75rem; color: #414868; margin-top: 8px; }
 .settings-grid { display: grid; grid-template-columns: repeat(8, 1fr); gap: 8px; }
 .setting-item { background: #161622; border: 1px solid #3d3d55; border-radius: 5px; padding: 6px 10px; }
 .setting-name { font-size: 0.6rem; color: #8888aa; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 3px; }
 .setting-val { font-size: 1.3rem; color: #9ece6a; font-weight: bold; }
</style>
</head>
<body>
<h1>llm-manager Dashboard</h1>
<div class="status">
  <span class="dot off" id="dot"></span>
  <span id="status-text">Disconnected</span>
</div>
<div id="model-name" class="model-name">Connecting...</div>
<div class="grid">
  <div class="card">
    <h2>Status</h2>
    <span id="state-badge" class="state-badge unloaded">Waiting</span>
  </div>
  <div class="card">
    <h2>Generation Speed</h2>
    <div id="tps" class="value">0</div>
    <div class="label">tokens/sec</div>
  </div>
  <div class="card">
    <h2>Prompt Speed</h2>
    <div id="prompt-tps" class="value">0</div>
    <div class="label">tokens/sec</div>
  </div>
  <div class="card">
    <h2>Latency</h2>
    <div id="latency" class="value">0 ms</div>
    <div class="label">per token</div>
  </div>
  <div class="card">
    <h2>Tokens</h2>
    <div id="ctx" class="value">0 / 0</div>
    <div class="bar-container"><div id="ctx-bar" class="bar blue" style="width:0%"></div></div>
  </div>
  <div class="card">
    <h2>Decoded</h2>
    <div id="decoded" class="value">0</div>
    <div class="label">total tokens</div>
  </div>
  <div class="card">
    <h2>Gen</h2>
    <div id="gen-tps" class="value">0</div>
    <div class="label">tokens/sec</div>
  </div>
  <div class="card">
    <h2>VRAM</h2>
    <div id="vram" class="value">0 / 0 MiB</div>
    <div class="bar-container"><div id="vram-bar" class="bar green" style="width:0%"></div></div>
  </div>
  <div class="card">
    <h2>RAM</h2>
    <div id="ram" class="value">0 MiB</div>
  </div>
  <div class="card">
    <h2>CPU</h2>
    <div id="cpu" class="value">0%</div>
  </div>
 </div>
<div class="grid" id="settings-grid" style="display:none;"></div>
<div class="card" id="cmd-card" style="display:none;">
  <h2>Server Command</h2>
  <pre id="cmd-display" style="white-space:pre-wrap;word-break:break-all;font-size:1.3rem;color:#9ece6a;margin:0;padding:0;"></pre>
</div>
<div class="meta" id="meta"></div>

<script>
let ws = null;
const url = new URL(window.location);
const protocol = url.protocol === 'https:' ? 'wss:' : 'ws:';
const host = url.host;
const auth = window.__WS_AUTH || url.searchParams.get('auth');

function connect() {
  if (ws) ws.close();
  const wsUrl = `${protocol}//${host}/ws${auth ? '?auth=' + encodeURIComponent(auth) : ''}`;
  ws = new WebSocket(wsUrl);
  
  ws.onopen = () => {
    document.getElementById('dot').className = 'dot live';
    document.getElementById('status-text').textContent = 'Live via WebSocket';
  };
  
  ws.onmessage = (event) => {
    try {
      const m = JSON.parse(event.data);
      update(m);
    } catch(e) {}
  };
  
  ws.onclose = () => {
    document.getElementById('dot').className = 'dot off';
    document.getElementById('status-text').textContent = 'Disconnected';
    setTimeout(connect, 2000);
  };
}

function s(m, key) { return m[key] !== undefined && m[key] !== null ? m[key] : '-'; }
function fmt(m, key) {
  const v = m[key];
  if (v === undefined || v === null) return '-';
  if (typeof v === 'boolean') return v ? 'On' : 'Off';
  if (typeof v === 'number' && v % 1 !== 0) return v.toFixed(2);
  return String(v);
}
function fmt_opt(m, key) {
  const v = m[key];
  if (v === undefined || v === null) return 'Off';
  if (typeof v === 'boolean') return v ? 'On' : 'Off';
  if (typeof v === 'number' && v % 1 !== 0) return v.toFixed(2);
  return String(v);
}
function render_settings(m) {
  const grid = document.getElementById('settings-grid');
  grid.style.display = 'grid';
  const items = [
    ['Backend', s(m, 'backend') + (s(m, 'llama_cpp_version') !== '-' ? ' v' + s(m, 'llama_cpp_version') : '')],
    ['Threads', s(m, 'threads')],
    ['Threads Batch', s(m, 'threads_batch')],
    ['Context', s(m, 'context_length')],
    ['Batch Size', s(m, 'batch_size')],
    ['Ubatch Size', s(m, 'ubatch_size')],
    ['Temp', fmt(m, 'temperature')],
    ['Top-k', s(m, 'top_k')],
    ['Top-p', fmt(m, 'top_p')],
    ['Min P', fmt(m, 'min_p')],
    ['Typical P', fmt(m, 'typical_p')],
    ['Seed', s(m, 'seed')],
    ['Repeat Penalty', fmt(m, 'repeat_penalty')],
    ['Repeat Last N', s(m, 'repeat_last_n')],
    ['Presence Penalty', fmt_opt(m, 'presence_penalty')],
    ['Freq Penalty', fmt_opt(m, 'frequency_penalty')],
    ['Mirostat', fmt_opt(m, 'mirostat')],
    ['Mirostat LR', fmt_opt(m, 'mirostat_lr')],
    ['Mirostat Ent', fmt_opt(m, 'mirostat_ent')],
    ['Max Tokens', fmt_opt(m, 'max_tokens')],
    ['Flash Attention', fmt(m, 'flash_attn')],
    ['KV Cache Offload', fmt(m, 'kv_cache_offload')],
    ['Cache Type K', fmt_opt(m, 'cache_type_k')],
    ['Cache Type V', fmt_opt(m, 'cache_type_v')],
    ['Unified KV', fmt(m, 'uniform_cache')],
    ['Mlock', fmt(m, 'mlock')],
    ['Mmap', fmt(m, 'mmap')],
    ['Embedding', fmt(m, 'embedding')],
    ['Jinja', fmt(m, 'jinja')],
    ['Ignore EOS', fmt(m, 'ignore_eos')],
    ['Expert Count', s(m, 'expert_count')],
    ['GPU Layers', s(m, 'gpu_layers')],
    ['Spec Type', s(m, 'spec_type')],
    ['Spec Draft N Max', s(m, 'draft_tokens')],
  ];
  grid.innerHTML = items.map(([name, val]) =>
    `<div class="setting-item"><div class="setting-name">${name}</div><div class="setting-val">${val}</div></div>`
  ).join('');
}
function update(m) {
  document.getElementById('model-name').textContent = m.model_name || 'No model loaded';
  document.getElementById('state-badge').textContent = m.state || 'unknown';
  document.getElementById('state-badge').className = 'state-badge ' + (m.loaded ? 'loaded' : 'unloaded');
  document.getElementById('tps').textContent = m.tps.toFixed(1);
  document.getElementById('prompt-tps').textContent = m.prompt_tps.toFixed(1);
  document.getElementById('ctx').textContent = `${m.ctx_used} / ${m.ctx_max || ''}`;
  const ctxPct = (m.ctx_max && m.ctx_max > 0) ? (m.ctx_used / m.ctx_max * 100) : 0;
  document.getElementById('ctx-bar').style.width = ctxPct + '%';
  document.getElementById('cpu').textContent = m.cpu_usage.toFixed(0) + '%';
  document.getElementById('vram').textContent = m.gpu_mem_total > 0
    ? `${(m.gpu_mem_used /1024/1024/1024).toFixed(1)} / ${(m.gpu_mem_total/1024/1024/1024).toFixed(1)} GB`
    : '0.0 / 0.0 GB';
  const vramPct = m.gpu_mem_total > 0 ? (m.gpu_mem_used / m.gpu_mem_total * 100) : 0;
  const vramBar = document.getElementById('vram-bar');
  vramBar.style.width = vramPct + '%';
  vramBar.className = 'bar ' + (vramPct > 80 ? 'red' : vramPct > 50 ? 'yellow' : 'green');
  document.getElementById('ram').textContent = (m.ram_used / 1024 / 1024 / 1024).toFixed(1) + ' GB';
  document.getElementById('decoded').textContent = m.decoded_tokens;
  document.getElementById('gen-tps').textContent = m.gen_tps.toFixed(1);
  document.getElementById('latency').textContent = m.latency_per_token_ms.toFixed(1) + ' ms';
  render_settings(m);
  const cmd = m.cmd_display;
  const cmdCard = document.getElementById('cmd-card');
  if (cmd) {
    cmdCard.style.display = 'block';
    document.getElementById('cmd-display').textContent = cmd;
  } else {
    cmdCard.style.display = 'none';
  }
}

connect();
</script>
</body>
</html>