Skip to main content

mur_common/
mobile.rs

1//! Wire protocol for the MUR mobile app ↔ Mac daemon WebSocket endpoint.
2//!
3//! Shared by `mur-mobile-sdk` (the phone client) and `mur-daemon` (the Mac
4//! endpoint) so both ends agree on the framing. Every message is one JSON
5//! object sent as a WebSocket *text* frame. The first client frame is a
6//! [`ClientFrame::Hello`] pairing handshake; once the server replies
7//! [`ServerFrame::Paired`], application traffic is carried as Ed25519-signed
8//! [`SignedEnvelope`]s whose `payload` is the canonical JSON of an A2A
9//! `JsonRpcRequest` — the same crypto MUR uses for agent↔agent bridge traffic.
10//!
11//! P3 adds a voice streaming path: the phone streams raw 16 kHz mono f32 PCM
12//! chunks, the Mac runs whisper.cpp for an authoritative transcript, then
13//! replies with Kokoro TTS audio chunks.
14//!
15//! Design: `docs/superpowers/specs/2026-06-05-mur-voice-mobile-app-design.md`.
16
17use crate::bridge::envelope::SignedEnvelope;
18use serde::{Deserialize, Serialize};
19
20/// WebSocket path the daemon's mobile endpoint serves.
21pub const MOBILE_WS_PATH: &str = "/api/v1/mobile/ws";
22
23/// Frames the phone sends to the Mac endpoint.
24#[derive(Debug, Serialize, Deserialize)]
25#[serde(tag = "type", rename_all = "snake_case")]
26pub enum ClientFrame {
27    /// Pairing / auth handshake. `token` is the one-time token from the QR
28    /// code; `pubkey` is the phone's multibase Ed25519 public key, which the
29    /// Mac records as a paired device on success. `agent` is the canonical
30    /// agent name the phone wants to talk to (e.g. `"mur"`).
31    Hello {
32        pubkey: String,
33        token: String,
34        agent: String,
35    },
36    /// A signed A2A request destined for the agent (text-only path).
37    Envelope { envelope: SignedEnvelope },
38    /// Phone begins a voice utterance. The Mac clears its audio accumulator and
39    /// prepares for incoming chunks at `sample_rate` Hz, mono f32.
40    AudioStreamStart { sample_rate: u32 },
41    /// One chunk of raw PCM (f32 LE, `sample_rate` Hz mono, standard base64).
42    /// Authenticated by the connection (paired at `Hello`); no per-chunk sig.
43    AudioChunk { data: String },
44    /// Phone finished speaking. Mac should run STT on the accumulated audio,
45    /// forward to the agent, and stream TTS audio back.
46    AudioStreamEnd,
47}
48
49/// Frames the Mac endpoint sends back to the phone.
50#[derive(Debug, Serialize, Deserialize)]
51#[serde(tag = "type", rename_all = "snake_case")]
52pub enum ServerFrame {
53    /// Handshake accepted; the phone is now paired with `agent`.
54    Paired { agent: String },
55    /// The handshake or a later frame was rejected.
56    Rejected { reason: String },
57    /// An asynchronous event mirrored to the phone. `name` is dot-namespaced
58    /// (`mobile.transcript`, `mobile.reply`, …) to match the Hub `EventBus`
59    /// names used for desktop mirroring.
60    Event {
61        name: String,
62        payload: serde_json::Value,
63    },
64    /// Mac whisper.cpp authoritative transcript for the user's just-spoken
65    /// utterance. Overrides the on-device SFSpeech partial. `is_final: true`
66    /// means this is the definitive text for this turn.
67    Transcript { text: String, is_final: bool },
68    /// A chunk of Kokoro TTS audio (f32 LE PCM, 24 kHz mono). The phone
69    /// accumulates chunks until `done: true`, then plays them back.
70    /// `base64` is the standard base64 encoding of the raw bytes.
71    AudioChunk {
72        base64: String,
73        sample_rate: u32,
74        done: bool,
75    },
76}