ogenius 0.1.5

Lightweight AI inference server using HuggingFace models directly - a simpler alternative to Ollama
Documentation
<!DOCTYPE html>
<html lang="en">

<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Genius Chat</title>
    <style>
        :root {
            --bg: #0a0a0c;
            --surface: #131317;
            --header: #1a1a20;
            --accent: #4f46e5;
            --text: #e2e8f0;
            --text-muted: #94a3b8;
        }

        body {
            background: var(--bg);
            color: var(--text);
            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
            margin: 0;
            display: flex;
            flex-direction: column;
            height: 100vh;
        }

        header {
            background: var(--header);
            padding: 1rem 2rem;
            display: flex;
            justify-content: space-between;
            align-items: center;
            border-bottom: 1px solid #ffffff0a;
        }

        #chat-container {
            flex: 1;
            overflow-y: auto;
            padding: 2rem;
            display: flex;
            flex-direction: column;
            gap: 1rem;
        }

        .message {
            max-width: 80%;
            padding: 1rem;
            border-radius: 12px;
            line-height: 1.5;
        }

        .user {
            align-self: flex-end;
            background: var(--accent);
            color: white;
        }

        .assistant {
            align-self: flex-start;
            background: var(--surface);
            border: 1px solid #ffffff0a;
        }

        .thinking {
            font-style: italic;
            color: var(--text-muted);
            font-size: 0.9rem;
            margin-bottom: 0.5rem;
            padding: 0.5rem;
            border-left: 2px solid var(--accent);
            background: #ffffff05;
        }

        #input-area {
            background: var(--header);
            padding: 1.5rem 2rem;
            display: flex;
            gap: 1rem;
            border-top: 1px solid #ffffff0a;
        }

        input {
            flex: 1;
            background: var(--bg);
            border: 1px solid #ffffff14;
            color: white;
            padding: 0.75rem 1rem;
            border-radius: 8px;
            outline: none;
        }

        input:focus {
            border-color: var(--accent);
        }

        button {
            background: var(--accent);
            color: white;
            border: none;
            padding: 0.75rem 1.5rem;
            border-radius: 8px;
            cursor: pointer;
            font-weight: 600;
        }

        button:hover {
            opacity: 0.9;
        }
    </style>
</head>

<body>
    <header>
        <div style="font-weight: bold; font-size: 1.2rem;">ogenius</div>
        <div style="display: flex; gap: 1rem; align-items: center;">
            <label style="font-size: 0.8rem; color: var(--text-muted);">
                <input type="checkbox" id="thinking-toggle" checked> Show Thinking
            </label>
            <select id="model-select">
                <option>Loading models...</option>
            </select>
        </div>
    </header>

    <div id="chat-container"></div>

    <div id="input-area">
        <input type="text" id="user-input" placeholder="Type a message..." autocomplete="off">
        <button id="send-btn">Send</button>
    </div>

    <script>
        const chatContainer = document.getElementById('chat-container');
        const userInput = document.getElementById('user-input');
        const sendBtn = document.getElementById('send-btn');
        const modelSelect = document.getElementById('model-select');
        const thinkingToggle = document.getElementById('thinking-toggle');

        let ws = null;
        let currentAssistantMsg = null;
        let currentThinkingBlock = null;

        async function loadModels() {
            try {
                const res = await fetch('/v1/models');
                const result = await res.json();
                const models = result.data || [];
                modelSelect.innerHTML = models.map(m => `<option value="${m.id}">${m.id}</option>`).join('');
            } catch (e) {
                modelSelect.innerHTML = '<option>Error loading models</option>';
            }
        }

        function appendMessage(role, content) {
            const div = document.createElement('div');
            div.className = `message ${role}`;
            div.textContent = content;
            chatContainer.appendChild(div);
            chatContainer.scrollTop = chatContainer.scrollHeight;
            return div;
        }

        function connectWS() {
            const protocol = window.location.protocol === 'https:' ? 'wss' : 'ws';
            // Use the same host but port 8081 as defined in main.rs
            const wsAddr = `${protocol}://${window.location.hostname}:8081`;
            ws = new WebSocket(wsAddr);

            ws.onopen = () => console.log('Connected to WebSocket');
            ws.onclose = () => {
                console.log('Disconnected from WebSocket, retrying...');
                setTimeout(connectWS, 1000);
            };

            ws.onmessage = (event) => {
                const msg = JSON.parse(event.data);

                // BrainstemOutput handling
                if (msg.Event) {
                    const ev = msg.Event;
                    if (ev === 'ProcessStart') {
                        // Start of new response
                    } else if (ev.Thought) {
                        if (thinkingToggle.checked) {
                            handleThought(ev.Thought);
                        }
                    } else if (ev.Content) {
                        handleContent(ev.Content);
                    } else if (ev === 'Complete') {
                        currentAssistantMsg = null;
                        currentThinkingBlock = null;
                        userInput.disabled = false;
                        sendBtn.disabled = false;
                    }
                } else if (msg.Error) {
                    appendMessage('assistant', 'Error: ' + msg.Error);
                }
            };
        }

        function handleThought(thought) {
            if (!currentThinkingBlock) {
                currentThinkingBlock = document.createElement('div');
                currentThinkingBlock.className = 'thinking';
                currentThinkingBlock.textContent = 'Thinking: ';
                chatContainer.appendChild(currentThinkingBlock);
            }

            if (thought === 'Start') {
                // Already created
            } else if (thought.Delta) {
                currentThinkingBlock.textContent += thought.Delta;
            } else if (thought === 'Stop') {
                // Done thinking
            }
            chatContainer.scrollTop = chatContainer.scrollHeight;
        }

        function handleContent(content) {
            if (!currentAssistantMsg) {
                currentAssistantMsg = appendMessage('assistant', '');
            }
            currentAssistantMsg.textContent += content;
            chatContainer.scrollTop = chatContainer.scrollHeight;
        }

        async function sendMessage() {
            const text = userInput.value.trim();
            if (!text || ws.readyState !== WebSocket.OPEN) return;

            appendMessage('user', text);
            userInput.value = '';
            userInput.disabled = true;
            sendBtn.disabled = true;

            ws.send(JSON.stringify({
                prompt: text,
                model: modelSelect.value
            }));
        }

        sendBtn.addEventListener('click', sendMessage);
        userInput.addEventListener('keypress', (e) => {
            if (e.key === 'Enter') sendMessage();
        });

        loadModels();
        connectWS();
    </script>
</body>

</html>