mobux 0.7.0

A touch-friendly tmux web UI for unhinged people who run terminal sessions from their phone while walking the dog
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
// input-actions.js — shared 📎 attach and 🎤 dictate actions.
//
// These two actions are the "unreachable on a non-touch browser" features:
// xterm.js owns the keyboard on desktop, so there are no shortcuts for them.
// Both the mobile input bar (input-bar.js) and the desktop top bar
// (top-bar.js) drive the SAME flows from here — one upload path, one mic
// capture/transcribe path, one set of `mic.*` telemetry events.
//
// Each factory returns a small handle with a trigger and (for dictation) the
// recording state. Callers own their own button DOM and pass it in so the
// action can reflect state (label / `.mic-recording`) on whichever button is
// visible; UI-only details (focus restore, error toasts) are injected via
// callbacks so behavior stays identical per surface.

import telemetry from './telemetry.js';
import { createMicOverlay } from './mic-overlay.js';

// ── File attach (any file type) ─────────────────────────────────────
// Owns a hidden <input type=file>, POSTs the picked file to /api/upload via
// the mesh relay (the returned path is only valid on the terminal's host),
// and drops the path into the terminal via send().
//
//   createAttachAction({ send, onError }) → { trigger() }
//     onError(message)  optional — surface an upload failure in the UI.
export function createAttachAction({ send, onError } = {}) {
  const fileInput = document.createElement('input');
  fileInput.type = 'file';
  fileInput.accept = '*/*';
  fileInput.style.display = 'none';
  document.body.appendChild(fileInput);

  async function uploadFile(file) {
    const form = new FormData();
    form.append('file', file);
    // Upload to whichever host drives the terminal: the returned path is only
    // meaningful on that host's filesystem, so it must go through the relay.
    const res = await window.MobuxMesh.apiFetch('/api/upload', { method: 'POST', body: form });
    if (!res.ok) throw new Error(await res.text());
    const { path } = await res.json();
    // Send path directly to terminal, ready to use.
    send(path);
  }

  fileInput.addEventListener('change', async () => {
    const file = fileInput.files?.[0];
    if (!file) return;
    try {
      await uploadFile(file);
    } catch (err) {
      console.error('Upload failed:', err);
      onError?.('Attach failed: upload error');
    }
    // Reset so the same file can be re-selected.
    fileInput.value = '';
  });

  return {
    trigger() { fileInput.click(); },
  };
}

// ── Speech-to-text (dictation) ──────────────────────────────────────
// Capture mic audio with Web Audio (NOT MediaRecorder — we need raw PCM),
// downsample to 16 kHz mono, encode a 16-bit WAV client-side, POST it to
// /transcribe (same-origin, so the session cookie rides along), then inject
// the returned text into the terminal exactly like the green send button.
//
//   createDictateAction({ send, button, onText }) → { trigger(), isRecording() }
//     button   the 🎤 button element — gets `.mic-recording` + label updates.
//     onText() optional — invoked after a successful injection (e.g. refocus
//              the mobile text input). The injection itself always happens.
const TARGET_RATE = 16000;
const MAX_SECONDS = 60;

export function createDictateAction({ send, button, onText } = {}) {
  const mic = {
    recording: false,
    busy: false,
    stream: null,
    ctx: null,
    source: null,
    analyser: null,
    processor: null,
    chunks: [],
    inputRate: 0,
    timer: null,
    deadline: null,
    startedAt: 0,
    paused: false,
    pendingChunks: null,
    pendingRate: 0,
    pendingDurationMs: 0,
  };

  function micLabel(text) {
    if (button) button.textContent = text;
  }

  // Full-viewport overlay with five states.
  const micOverlay = createMicOverlay({
    onStop: () => { if (mic.recording) captureStop(); },
    onFastSubmit: () => { if (mic.recording) captureStopAndSubmit(); },
    onPause: () => {
      mic.paused = true;
      telemetry.log('mic.pause');
    },
    onResume: () => {
      mic.paused = false;
      telemetry.log('mic.resume');
    },
    onCancel: () => { cancelRecording(); },
    onDismiss: () => {
      // Overlay already removed itself; just reset mic state so the next tap works.
      mic.recording = false;
      mic.busy = false;
      mic.paused = false;
      mic.pendingChunks = null;
      stopTracks();
      button?.classList.remove('mic-recording');
      micLabel('🎤');
    },
    // REVIEW state: user wants a different take — discard and record again.
    onRetry: () => { retryFresh(); },
    // FAULT state: reuse the captured audio if the failure happened after
    // recording; only fall back to a fresh recording when there is nothing
    // to resend (e.g. permission/secure-context faults raised before capture).
    onFaultRetry: () => {
      // pendingChunks is an array (possibly empty, if Stop landed before any
      // audio buffer had fired) whenever a stop-capture already happened —
      // only null once discarded/consumed. Check presence, not chunk count.
      if (mic.pendingChunks !== null) retryPendingTranscription();
      else retryFresh();
    },
    onSubmit: (text) => { submitText(text); },
    retryTranscription: () => { retryPendingTranscription(); },
  });

  // Show a fault: emit telemetry AND render the overlay so logs and UI agree.
  function micFault(kind, extra, opts) {
    telemetry.log('mic.fault', extra ? { kind, extra } : { kind });
    button?.classList.remove('mic-recording');
    mic.recording = false;
    mic.busy = false;
    micLabel('🎤');
    micOverlay.showFault(kind, extra, opts);
  }

  // Merge captured Float32 chunks, downsample to 16 kHz, and PCM-encode a WAV.
  function encodeWav(chunks, inputRate) {
    let total = 0;
    for (const c of chunks) total += c.length;
    const merged = new Float32Array(total);
    let off = 0;
    for (const c of chunks) { merged.set(c, off); off += c.length; }

    // Linear-interpolation downsample to 16 kHz (input is typically 44.1/48k).
    let samples = merged;
    if (inputRate !== TARGET_RATE) {
      const ratio = inputRate / TARGET_RATE;
      const outLen = Math.floor(merged.length / ratio);
      const out = new Float32Array(outLen);
      for (let i = 0; i < outLen; i++) {
        const pos = i * ratio;
        const i0 = Math.floor(pos);
        const i1 = Math.min(i0 + 1, merged.length - 1);
        const frac = pos - i0;
        out[i] = merged[i0] * (1 - frac) + merged[i1] * frac;
      }
      samples = out;
    }

    // 16-bit PCM WAV: 44-byte header + interleaved (mono) samples.
    const buffer = new ArrayBuffer(44 + samples.length * 2);
    const view = new DataView(buffer);
    const writeStr = (o, s) => { for (let i = 0; i < s.length; i++) view.setUint8(o + i, s.charCodeAt(i)); };
    const dataLen = samples.length * 2;
    writeStr(0, 'RIFF');
    view.setUint32(4, 36 + dataLen, true);
    writeStr(8, 'WAVE');
    writeStr(12, 'fmt ');
    view.setUint32(16, 16, true);        // PCM chunk size
    view.setUint16(20, 1, true);         // PCM format
    view.setUint16(22, 1, true);         // mono
    view.setUint32(24, TARGET_RATE, true);
    view.setUint32(28, TARGET_RATE * 2, true); // byte rate
    view.setUint16(32, 2, true);         // block align
    view.setUint16(34, 16, true);        // bits per sample
    writeStr(36, 'data');
    view.setUint32(40, dataLen, true);
    let p = 44;
    for (let i = 0; i < samples.length; i++) {
      const s = Math.max(-1, Math.min(1, samples[i]));
      view.setInt16(p, s < 0 ? s * 0x8000 : s * 0x7fff, true);
      p += 2;
    }
    return new Blob([buffer], { type: 'audio/wav' });
  }

  function stopTracks() {
    if (mic.processor) { try { mic.processor.disconnect(); } catch (_) {} mic.processor.onaudioprocess = null; }
    if (mic.analyser) { try { mic.analyser.disconnect(); } catch (_) {} mic.analyser = null; }
    if (mic.source) { try { mic.source.disconnect(); } catch (_) {} }
    if (mic.ctx) { try { mic.ctx.close(); } catch (_) {} }
    if (mic.stream) mic.stream.getTracks().forEach((t) => t.stop());
    if (mic.timer) { clearInterval(mic.timer); mic.timer = null; }
    mic.processor = mic.source = mic.ctx = mic.stream = null;
  }

  // Probe /transcribe's backend before opening the mic, so a dead network STT
  // provider is surfaced immediately instead of after the user has already
  // talked into a recording that was never going to transcribe. Reuses the
  // same /api/stt/status endpoint the Settings → Speech-to-text card polls.
  async function probeSttBackend() {
    let status = null;
    try {
      const res = await fetch('/api/stt/status');
      status = await res.json();
    } catch (err) {
      // Probe itself failed (e.g. offline) — don't block recording on that;
      // the real /transcribe call will surface its own fault if needed.
      telemetry.log('mic.probe.err', { message: err?.message || 'network error' });
      return true;
    }
    telemetry.log('mic.probe', { kind: status?.kind, reachable: !!status?.reachable });
    if (status?.reachable) return true;
    micFault('model', (status?.kind || 'unknown') + ' backend unreachable', {
      onProceedAnyway: () => { startRecording({ skipProbe: true }); },
    });
    return false;
  }

  async function startRecording(opts) {
    if (mic.busy) return;
    // Claim busy immediately so a second tap during the probe/getUserMedia
    // await can't race into a second concurrent recording attempt.
    mic.busy = true;
    // Dismiss the soft keyboard — the text input keeps focus otherwise and the
    // on-screen keyboard covers the recording overlay on mobile.
    document.activeElement?.blur?.();
    mic.paused = false;
    // Secure-context / mediaDevices availability. getUserMedia is undefined on
    // http: (non-localhost) and in unsupported webviews.
    const secure = window.isSecureContext !== false;
    const hasGUM = !!navigator.mediaDevices?.getUserMedia;
    telemetry.log('mic.secure.check', { secure, hasGetUserMedia: hasGUM });
    if (!hasGUM) {
      micFault('insecure');
      return;
    }
    if (!opts?.skipProbe && !(await probeSttBackend())) return;
    telemetry.log('mic.getusermedia.req');
    try {
      mic.stream = await navigator.mediaDevices.getUserMedia({ audio: true });
    } catch (err) {
      const name = err?.name || 'Error';
      telemetry.log('mic.getusermedia.denied', { name, message: err?.message || '' });
      // Map the DOMException to a fault kind.
      if (name === 'NotFoundError' || name === 'DevicesNotFoundError') {
        micFault('notfound', name);
      } else if (name === 'NotAllowedError' || name === 'SecurityError' || name === 'PermissionDeniedError') {
        micFault('denied', name);
      } else {
        micFault('mic', name + ': ' + (err?.message || ''));
      }
      return;
    }
    telemetry.log('mic.getusermedia.ok');
    const AC = window.AudioContext || window.webkitAudioContext;
    mic.ctx = new AC();
    mic.inputRate = mic.ctx.sampleRate;
    mic.source = mic.ctx.createMediaStreamSource(mic.stream);

    // Insert AnalyserNode between source and processor so waveform taps the
    // graph without affecting the PCM capture.
    mic.analyser = mic.ctx.createAnalyser();
    mic.analyser.fftSize = 1024;
    mic.source.connect(mic.analyser);

    mic.processor = mic.ctx.createScriptProcessor(4096, 1, 1);
    mic.analyser.connect(mic.processor);
    mic.processor.connect(mic.ctx.destination);

    mic.chunks = [];
    mic.processor.onaudioprocess = (e) => {
      if (!mic.paused) {
        mic.chunks.push(new Float32Array(e.inputBuffer.getChannelData(0)));
      }
    };

    mic.recording = true;
    mic.busy = true;
    mic.startedAt = Date.now();
    button?.classList.add('mic-recording');
    micOverlay.showRecording(mic.analyser);
    telemetry.log('mic.recording.start', { inputRate: mic.inputRate });
    mic.deadline = Date.now() + MAX_SECONDS * 1000;
    const tick = () => {
      const left = Math.max(0, Math.ceil((mic.deadline - Date.now()) / 1000));
      micLabel('' + left);
      if (left <= 0) captureStop();
    };
    tick();
    mic.timer = setInterval(tick, 250);
  }

  // Stop capture and stash the audio in mic.pending* — kept around (never
  // cleared on a transcription failure) so a fault can be retried against the
  // same recording instead of forcing the user through a full re-record.
  function stopCapture() {
    if (!mic.recording) return false;
    mic.recording = false;
    button?.classList.remove('mic-recording');
    micLabel('');
    telemetry.log('mic.stop');

    const chunks = mic.chunks;
    const durationMs = mic.startedAt ? Date.now() - mic.startedAt : 0;
    mic.pendingChunks = chunks;
    mic.pendingRate = mic.inputRate;
    mic.pendingDurationMs = durationMs;
    stopTracks();
    mic.chunks = [];
    telemetry.log('mic.recording.stop', { durationMs, chunkCount: chunks.length });
    return true;
  }

  // POST mic.pendingChunks to /transcribe. Resolves the transcript (possibly
  // '') on success and clears mic.pendingChunks; on any failure it raises the
  // matching fault (leaving mic.pendingChunks intact for a retry) and
  // resolves null.
  async function transcribePending() {
    micLabel('');
    micOverlay.showTranscribing();

    try {
      const wav = encodeWav(mic.pendingChunks, mic.pendingRate);
      const form = new FormData();
      form.append('audio', wav, 'speech.wav');
      telemetry.log('mic.transcribe.req', { bytes: wav.size, durationMs: mic.pendingDurationMs });

      let res;
      try {
        res = await fetch('/transcribe', { method: 'POST', body: form });
      } catch (netErr) {
        telemetry.log('mic.transcribe.err', { stage: 'network', message: netErr?.message || '' });
        micFault('network', netErr?.message || 'network error');
        return null;
      }
      telemetry.log('mic.transcribe.resp', { status: res.status });

      if (!res.ok) {
        const bodyText = await res.text().catch(() => '');
        telemetry.log('mic.transcribe.err', { stage: 'http', status: res.status, body: bodyText.slice(0, 200) });
        if (res.status === 503) {
          micFault('model', '503 ' + bodyText.slice(0, 120));
        } else {
          micFault('http', res.status + ' ' + (bodyText.slice(0, 120) || res.statusText));
        }
        return null;
      }

      const { text } = await res.json();
      telemetry.log('mic.transcribe.ok', { textLength: (text || '').trim().length });
      mic.pendingChunks = null;
      return text && text.trim() ? text : '';
    } catch (err) {
      console.error('Transcription failed:', err);
      telemetry.log('mic.transcribe.err', { stage: 'exception', message: err?.message || String(err) });
      micFault('mic', err?.message || 'encode/transcribe error');
      return null;
    }
  }

  // Stop → preview: transcribe, then show REVIEW for the user to edit/confirm.
  async function captureStop() {
    if (!stopCapture()) return;
    const text = await transcribePending();
    if (text === null) return; // fault already shown, audio preserved
    micOverlay.showReview(text);
    // Note: mic.busy stays true until submit/cancel/retry resolves
  }

  // Stop → submit in one tap: transcribe and send straight through, no
  // preview. Falls back to REVIEW when there's nothing to submit.
  async function captureStopAndSubmit() {
    if (!stopCapture()) return;
    const text = await transcribePending();
    if (text === null) return; // fault already shown, audio preserved
    if (!text) {
      micOverlay.showReview(text);
      return;
    }
    submitText(text);
    micOverlay.dismiss();
  }

  // Retry a transcription against already-captured audio (FAULT-state Retry
  // with pending audio, and the auto-retry after installing/starting a local
  // STT server). Always lands back on REVIEW — never auto-submits — so a
  // second failure or an unexpected transcript still gets a human look.
  async function retryPendingTranscription() {
    if (mic.pendingChunks === null) return;
    const text = await transcribePending();
    if (text === null) return; // fault already shown, audio preserved
    micOverlay.showReview(text);
  }

  function cancelRecording() {
    mic.recording = false;
    mic.busy = false;
    mic.paused = false;
    stopTracks();
    mic.chunks = [];
    mic.pendingChunks = null;
    button?.classList.remove('mic-recording');
    micLabel('🎤');
    micOverlay.dismiss();
  }

  async function retryFresh() {
    telemetry.log('mic.retry');
    stopTracks();
    mic.chunks = [];
    mic.pendingChunks = null;
    mic.recording = false;
    mic.busy = false;
    mic.paused = false;
    micOverlay.dismiss();
    await startRecording();
  }

  function submitText(text) {
    telemetry.log('mic.submit');
    send(text.trim());
    send('\r');
    onText?.();
    mic.busy = false;
    micLabel('🎤');
  }

  return {
    trigger() {
      if (mic.busy) return;
      telemetry.log('mic.click', { action: 'start' });
      startRecording();
    },
    // Legacy compat
    toggle() {
      if (mic.busy) return;
      telemetry.log('mic.click', { action: mic.recording ? 'stop' : 'start' });
      if (mic.recording) captureStop();
      else startRecording();
    },
    isRecording() { return mic.recording; },
  };
}