mur_common/mobile.rs
1//! Wire protocol for the MUR mobile app ↔ Mac daemon WebSocket endpoint.
2//!
3//! Shared by `mur-mobile-sdk` (the phone client) and `mur-daemon` (the Mac
4//! endpoint) so both ends agree on the framing. Every message is one JSON
5//! object sent as a WebSocket *text* frame. The first client frame is a
6//! [`ClientFrame::Hello`] pairing handshake; once the server replies
7//! [`ServerFrame::Paired`], application traffic is carried as Ed25519-signed
8//! [`SignedEnvelope`]s whose `payload` is the canonical JSON of an A2A
9//! `JsonRpcRequest` — the same crypto MUR uses for agent↔agent bridge traffic.
10//!
11//! P3 adds a voice streaming path: the phone streams raw 16 kHz mono f32 PCM
12//! chunks, the Mac runs whisper.cpp for an authoritative transcript, then
13//! replies with Kokoro TTS audio chunks.
14//!
15//! Design: `docs/superpowers/specs/2026-06-05-mur-voice-mobile-app-design.md`.
16
17use crate::bridge::envelope::SignedEnvelope;
18use serde::{Deserialize, Serialize};
19
20/// WebSocket path the daemon's mobile endpoint serves.
21pub const MOBILE_WS_PATH: &str = "/api/v1/mobile/ws";
22
23/// Frames the phone sends to the Mac endpoint.
24#[derive(Debug, Serialize, Deserialize)]
25#[serde(tag = "type", rename_all = "snake_case")]
26pub enum ClientFrame {
27 /// Pairing / auth handshake. `token` is the one-time token from the QR
28 /// code; `pubkey` is the phone's multibase Ed25519 public key, which the
29 /// Mac records as a paired device on success. `agent` is the canonical
30 /// agent name the phone wants to talk to (e.g. `"mur"`).
31 Hello {
32 pubkey: String,
33 token: String,
34 agent: String,
35 },
36 /// A signed A2A request destined for the agent (text-only path).
37 Envelope { envelope: SignedEnvelope },
38 /// Phone begins a voice utterance. The Mac clears its audio accumulator and
39 /// prepares for incoming chunks at `sample_rate` Hz, mono f32.
40 AudioStreamStart { sample_rate: u32 },
41 /// One chunk of raw PCM (f32 LE, `sample_rate` Hz mono, standard base64).
42 /// Authenticated by the connection (paired at `Hello`); no per-chunk sig.
43 AudioChunk { data: String },
44 /// Phone finished speaking. Mac should run STT on the accumulated audio,
45 /// forward to the agent, and stream TTS audio back.
46 AudioStreamEnd,
47}
48
49/// Frames the Mac endpoint sends back to the phone.
50#[derive(Debug, Serialize, Deserialize)]
51#[serde(tag = "type", rename_all = "snake_case")]
52pub enum ServerFrame {
53 /// Handshake accepted; the phone is now paired with `agent`.
54 Paired { agent: String },
55 /// The handshake or a later frame was rejected.
56 Rejected { reason: String },
57 /// An asynchronous event mirrored to the phone. `name` is dot-namespaced
58 /// (`mobile.transcript`, `mobile.reply`, …) to match the Hub `EventBus`
59 /// names used for desktop mirroring.
60 Event {
61 name: String,
62 payload: serde_json::Value,
63 },
64 /// Mac whisper.cpp authoritative transcript for the user's just-spoken
65 /// utterance. Overrides the on-device SFSpeech partial. `is_final: true`
66 /// means this is the definitive text for this turn.
67 Transcript { text: String, is_final: bool },
68 /// A chunk of Kokoro TTS audio (f32 LE PCM, 24 kHz mono). The phone
69 /// accumulates chunks until `done: true`, then plays them back.
70 /// `base64` is the standard base64 encoding of the raw bytes.
71 AudioChunk {
72 base64: String,
73 sample_rate: u32,
74 done: bool,
75 },
76}