Skip to main content

ping_core/
client.rs

1//! `MessagingClient` — top-level handle. Owns the OpenMLS provider, identity, local device,
2//! and the set of open conversations.
3//!
4//! All operations are `async`. The intent is that the FFI generators emit Swift `async`,
5//! Kotlin `suspend`, and the WASM glue exposes Promises.
6
7use openmls::framing::MlsMessageOut;
8use openmls::prelude::{
9    tls_codec::Serialize as TlsSerialize, BasicCredential, Ciphersuite, CredentialWithKey,
10    KeyPackageBuilder,
11};
12use openmls_basic_credential::SignatureKeyPair;
13use openmls_traits::OpenMlsProvider;
14use parking_lot::RwLock;
15use ping_mls_store::{PersistentMlsProvider, StorageBackend};
16use std::collections::HashMap;
17use std::sync::Arc;
18use zeroize::Zeroizing;
19
20use crate::{
21    codec,
22    conversation::{Conversation, ConversationId, ConversationMeta, MemberInfo},
23    device::{
24        CatchupAppEventEntry, CatchupConversationEntry, CatchupSnapshot, DeviceId, DeviceInfo,
25        LinkingTicket, LocalDevice, CATCHUP_SNAPSHOT_VERSION,
26    },
27    error::{Error, Result},
28    identity::{Identity, UserId},
29    message::{IncomingMessage, MessageEnvelope, MessageKind},
30    storage::Storage,
31    sync::SyncCursor,
32    transport::Transport,
33};
34
35const DEFAULT_CIPHERSUITE: Ciphersuite = Ciphersuite::MLS_128_DHKEMX25519_AES128GCM_SHA256_Ed25519;
36
37/// Per-chat result reported by [`MessagingClient::admit_device_to_chats`].
38#[derive(Debug, Clone)]
39pub struct AdmitChatOutcome {
40    pub conversation_id: ConversationId,
41    pub status: AdmitChatStatus,
42}
43
44#[derive(Debug, Clone)]
45pub enum AdmitChatStatus {
46    /// The new device is now an MLS leaf in this chat. Both the Commit
47    /// and the addressed Welcome have been sent.
48    Admitted,
49    /// We chose not to admit (e.g. the conversation is a DeviceGroup,
50    /// which was already handled at linking-ticket build time).
51    Skipped { reason: String },
52    /// MLS or transport rejected the admission. `error` is the underlying
53    /// message — typically a `transport error: ...` or an OpenMLS error.
54    Failed { error: String },
55}
56
57#[derive(Debug)]
58pub struct ClientConfig {
59    pub identity: Identity,
60    pub device_label: String,
61    pub storage: Arc<dyn Storage>,
62    pub transport: Arc<dyn Transport>,
63    /// Wall clock in ms. Pulled from the host so we can use a synthetic clock in tests.
64    pub now_ms: u64,
65    /// [CR-4] OpenMLS-provider backend. Defaults to in-memory; iOS NSE and web SW
66    /// cold-start paths MUST pass `StorageBackend::Sqlite { path, encryption_key }`
67    /// (native) or `StorageBackend::IndexedDb { db_name }` (WASM, when that lands).
68    /// See `docs/design/CR4_CR7_PERSISTENCE.md`.
69    pub storage_backend: StorageBackend,
70    /// Optional 32-byte Ed25519 secret key the SDK should use as the
71    /// device signing key. When set AND no `LocalDevice` is yet
72    /// persisted in `storage`, the SDK constructs its first
73    /// `LocalDevice` from this key instead of generating a fresh
74    /// random one — so `device_id = SHA-256(public_key_of(secret))`
75    /// is fully determined by what the host provided.
76    ///
77    /// Use case: align the SDK's `device_id` (which it stamps into
78    /// every envelope's `sender_device` field) with an externally-
79    /// computed device id — typically `SHA-256(device_signing_pubkey)`
80    /// in the host's auth layer, where the JWT carries that same
81    /// value as its `device_id` claim. Without this alignment, a
82    /// server that validates `envelope.sender_device ==
83    /// jwt.device_id` would reject every send.
84    ///
85    /// Ignored on re-init (when storage already has a persisted
86    /// `LocalDevice`) so the device identity remains stable across
87    /// restarts.
88    pub device_signing_secret_key: Option<[u8; 32]>,
89}
90
91impl ClientConfig {
92    /// Construct a config with `StorageBackend::Memory` — convenient for tests and
93    /// the existing v0.1 in-memory flow.
94    pub fn new_in_memory(
95        identity: Identity,
96        device_label: String,
97        storage: Arc<dyn Storage>,
98        transport: Arc<dyn Transport>,
99        now_ms: u64,
100    ) -> Self {
101        Self {
102            identity,
103            device_label,
104            storage,
105            transport,
106            now_ms,
107            storage_backend: StorageBackend::Memory,
108            device_signing_secret_key: None,
109        }
110    }
111}
112
113pub struct MessagingClient {
114    pub(crate) identity: Identity,
115    pub(crate) local_device: LocalDevice,
116    pub(crate) crypto: Arc<PersistentMlsProvider>,
117    pub(crate) signing: Arc<SignatureKeyPair>,
118    pub(crate) storage: Arc<dyn Storage>,
119    pub(crate) transport: Arc<dyn Transport>,
120    conversations: RwLock<HashMap<ConversationId, Conversation>>,
121}
122
123impl std::fmt::Debug for MessagingClient {
124    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
125        f.debug_struct("MessagingClient")
126            .field("user_id", &self.identity.user_id().as_hex())
127            .field("device_id", &self.local_device.device_id.as_hex())
128            .field("conversation_count", &self.conversations.read().len())
129            .finish()
130    }
131}
132
133impl MessagingClient {
134    /// Initialise. Creates a new local device if none is recorded in storage; otherwise rehydrates.
135    pub async fn init(cfg: ClientConfig) -> Result<Arc<Self>> {
136        // [CR-4] OpenMLS provider is now pluggable. For `StorageBackend::Memory` this
137        // behaves like the old `OpenMlsRustCrypto::default()`. For `Sqlite`, the
138        // working set is hydrated from the on-disk blob; subsequent `checkpoint` calls
139        // flush it back. iOS NSE / web SW cold-start lives here.
140        //
141        // Use `open_async` so the WASM `StorageBackend::IndexedDb` variant can read
142        // its snapshot blob through the host-supplied `AsyncBlobStore` before
143        // returning — without this, the provider's `MemoryStorage` would be empty
144        // and `MlsGroup::load` would silently return `None` for every group on
145        // cold restart, breaking chat persistence across reloads. Native targets
146        // (Memory + Sqlite) delegate to the sync path under the hood, so the
147        // `.await` is free there.
148        let crypto = PersistentMlsProvider::open_async(cfg.storage_backend.clone())
149            .await
150            .map_err(|e| Error::Storage(format!("provider open: {e}")))?;
151        let local_device = match cfg.storage.get("device", "local").await? {
152            Some(bytes) => decode_local_device(&bytes, cfg.identity.user_id().clone())?,
153            None => {
154                // First-init path. If the host supplied a signing secret
155                // (typically to align the device_id with their auth
156                // layer), use it; otherwise mint a fresh random key.
157                // Either way, the constructed `LocalDevice` is
158                // immediately persisted so future inits load from
159                // storage without consulting the override again.
160                let dev = match cfg.device_signing_secret_key.as_ref() {
161                    Some(secret) => LocalDevice::from_signing_secret(
162                        cfg.identity.user_id().clone(),
163                        cfg.device_label,
164                        cfg.now_ms,
165                        secret,
166                    ),
167                    None => LocalDevice::generate(
168                        cfg.identity.user_id().clone(),
169                        cfg.device_label,
170                        cfg.now_ms,
171                    ),
172                };
173                let bytes = encode_local_device(&dev)?;
174                cfg.storage.put("device", "local", bytes).await?;
175                dev
176            }
177        };
178
179        // [CR-4] MLS signing keypair MUST be stable across cold restarts — otherwise the
180        // leaf-key stored on disk no longer matches the per-client key on re-init, and any
181        // send-after-restart silently misroutes. We derive deterministically from the
182        // already-persistent `LocalDevice::signing` (Ed25519, 32 raw bytes), and the
183        // ciphersuite's signature scheme is Ed25519 too — so the device signing key and the
184        // MLS leaf signing key are the same bytes. The MLS storage provider also receives
185        // a copy via `store()` so OpenMLS-internal lookups (process_message, etc.) succeed.
186        let signing = {
187            let sk_bytes = local_device.signing.to_bytes().to_vec();
188            let pk_bytes = local_device.signing.verifying_key().to_bytes().to_vec();
189            let kp = SignatureKeyPair::from_raw(
190                DEFAULT_CIPHERSUITE.signature_algorithm(),
191                sk_bytes,
192                pk_bytes,
193            );
194            kp.store(crypto.storage()).map_err(Error::mls)?;
195            Arc::new(kp)
196        };
197
198        let client = Arc::new(Self {
199            identity: cfg.identity,
200            local_device,
201            crypto,
202            signing,
203            storage: cfg.storage,
204            transport: cfg.transport,
205            conversations: RwLock::new(HashMap::new()),
206        });
207
208        client.rehydrate_conversations(cfg.now_ms).await?;
209
210        // [CR-10] Ensure the DeviceGroup exists at init, not lazily inside
211        // build_linking_ticket. Single-device users need somewhere to write
212        // personal events (drafts, read pointers, notes, vault wrapper)
213        // even before they pair a second device. Lazy creation in
214        // build_linking_ticket left them with no DG → no place for
215        // personal state to land.
216        //
217        // Idempotent — re-init after a cold restart finds the DG via
218        // rehydrate_conversations and this becomes a no-op.
219        client.ensure_device_group(cfg.now_ms).await?;
220
221        Ok(client)
222    }
223
224    /// [CR-10] Idempotently ensures this user's DeviceGroup exists in
225    /// `self.conversations`. Called from `init` (so single-device users
226    /// have a DG immediately) and from `build_linking_ticket` (the legacy
227    /// lazy path; still safe to call when the DG already exists, since
228    /// rehydrate_conversations would have re-attached it before init
229    /// returned).
230    ///
231    /// The DeviceGroup is a one-leaf MLS group at creation time —
232    /// `add_members` (called by `build_linking_ticket` when a second
233    /// device pairs in) is what grows it. We persist the snapshot so a
234    /// cold restart picks it up before this function runs again.
235    pub(crate) async fn ensure_device_group(self: &Arc<Self>, now_ms: u64) -> Result<()> {
236        let dg_id = device_group_id_for(self.identity.user_id());
237        if self.conversations.read().contains_key(&dg_id) {
238            return Ok(());
239        }
240        let mut new_dg = Conversation::create(
241            dg_id,
242            Some("device-group".into()),
243            self.local_device.device_id.clone(),
244            self.identity.user_id(),
245            self.crypto.clone(),
246            self.signing.clone(),
247            self.storage.clone(),
248            now_ms,
249        )?;
250        new_dg.meta.is_device_group = true;
251        new_dg.snapshot_to_storage().await?;
252        self.conversations.write().insert(dg_id, new_dg);
253        Ok(())
254    }
255
256    pub fn user_id(&self) -> UserId {
257        self.identity.user_id().clone()
258    }
259    pub fn device_id(&self) -> DeviceId {
260        self.local_device.device_id.clone()
261    }
262    pub fn device_info(&self, now_ms: u64) -> DeviceInfo {
263        self.local_device.info(now_ms)
264    }
265
266    /// Generate a fresh KeyPackage to publish to the directory. Hosts call this when registering
267    /// a device or topping up the directory.
268    pub fn fresh_key_package(&self) -> Result<Vec<u8>> {
269        let credential_with_key = CredentialWithKey {
270            credential: BasicCredential::new(self.identity.user_id().0.clone()).into(),
271            signature_key: self.signing.public().to_vec().into(),
272        };
273        let bundle = KeyPackageBuilder::new()
274            .build(
275                DEFAULT_CIPHERSUITE,
276                self.crypto.as_ref(),
277                self.signing.as_ref(),
278                credential_with_key,
279            )
280            .map_err(Error::mls)?;
281        // KeyPackages are serialized as MlsMessage(KeyPackage) per the MLS framing spec.
282        let msg: MlsMessageOut = bundle.key_package().clone().into();
283        msg.tls_serialize_detached().map_err(Error::mls)
284    }
285
286    /// Create a new conversation owned by this client (and seeded with a single member: this device).
287    pub async fn create_conversation(
288        self: &Arc<Self>,
289        name: Option<String>,
290        now_ms: u64,
291    ) -> Result<ConversationId> {
292        let id = ConversationId::new();
293        let convo = Conversation::create(
294            id,
295            name,
296            self.local_device.device_id.clone(),
297            self.identity.user_id(),
298            self.crypto.clone(),
299            self.signing.clone(),
300            self.storage.clone(),
301            now_ms,
302        )?;
303        convo.snapshot_to_storage().await?;
304        self.conversations.write().insert(id, convo);
305        Ok(id)
306    }
307
308    /// Join via a Welcome bundled in a [`MessageEnvelope`] of kind `Welcome`.
309    pub async fn join_conversation(
310        self: &Arc<Self>,
311        welcome_envelope: &MessageEnvelope,
312        now_ms: u64,
313    ) -> Result<ConversationId> {
314        if welcome_envelope.kind != MessageKind::Welcome {
315            return Err(Error::Invalid("expected Welcome envelope".into()));
316        }
317        let convo = Conversation::join(
318            &welcome_envelope.payload,
319            self.local_device.device_id.clone(),
320            self.crypto.clone(),
321            self.signing.clone(),
322            self.storage.clone(),
323            now_ms,
324        )?;
325        let id = convo.id();
326        convo.snapshot_to_storage().await?;
327        self.conversations.write().insert(id, convo);
328        Ok(id)
329    }
330
331    pub fn list_conversations(&self) -> Vec<ConversationMeta> {
332        self.conversations
333            .read()
334            .values()
335            .map(|c| c.meta.clone())
336            .collect()
337    }
338
339    /// Member roster for a conversation, recovered locally from the MLS
340    /// group's leaf credentials. Empty if the conversation is unknown to
341    /// this client. Lets any device (including one that just joined via a
342    /// linking Welcome) resolve a 1:1 peer's `UserId` without the
343    /// out-of-band `ping.profile` re-send.
344    pub fn members(&self, conv_id: ConversationId) -> Vec<MemberInfo> {
345        self.conversations
346            .read()
347            .get(&conv_id)
348            .map(|c| c.members())
349            .unwrap_or_default()
350    }
351
352    /// Send an application message. Returns once the envelope has been handed to the transport.
353    #[allow(clippy::await_holding_lock)] // see add_members for rationale
354    pub async fn send(
355        &self,
356        conv_id: ConversationId,
357        plaintext: Vec<u8>,
358        now_ms: u64,
359    ) -> Result<MessageEnvelope> {
360        let envelope = {
361            let mut guard = self.conversations.write();
362            let convo = guard
363                .get_mut(&conv_id)
364                .ok_or_else(|| Error::UnknownConversation(conv_id.as_hex()))?;
365            convo.send_application(&plaintext, now_ms)?
366        };
367        self.transport.send(envelope.clone()).await?;
368        // The OpenMLS sender ratchet advances on every Application message — `seq` + `hlc`
369        // are bumped on the conversation, and the underlying group keystore stores new
370        // generation keys. Without a checkpoint here, a reload rolls back to the pre-send
371        // state and the next send re-uses an already-consumed generation that receivers
372        // silently drop. Mirrors the snapshot calls after every Commit/Welcome op.
373        if let Some(c) = self.conversations.read().get(&conv_id) {
374            c.snapshot_to_storage().await?;
375        }
376        Ok(envelope)
377    }
378
379    /// Add members. The Commit goes on the wire; the Welcome should be delivered to the new
380    /// devices' inboxes (the host transport implements that — typically as a separate addressed
381    /// envelope).
382    ///
383    /// [CR-2] Each entry is `(DeviceId, KeyPackage_bytes)`. The host typically gets the
384    /// device_id from the directory at the same time it gets the KeyPackage; we use it to
385    /// record a per-conversation `device_id → leaf_index` map so [`Self::revoke_device`]
386    /// can later locate the leaf without a fresh directory lookup. The SDK does not
387    /// cryptographically verify the host's device-id claim — that's a directory policy
388    /// concern.
389    //
390    // We hold a `parking_lot` read guard across `.await` for `snapshot_to_storage` here. Clippy
391    // flags this; we keep it for v0.1 because the alternative is a structural refactor of
392    // Conversation::snapshot_to_storage to split sync prep from async writes — see
393    // docs/ASSUMPTIONS.md item "lock-during-async-I/O is suboptimal but acceptable for v0.1".
394    // The `parking_lot/send_guard` feature (in core/Cargo.toml) makes the guard `Send` so the
395    // future is still schedulable across tokio threads.
396    #[allow(clippy::await_holding_lock)]
397    pub async fn add_members(
398        &self,
399        conv_id: ConversationId,
400        entries: Vec<(DeviceId, Vec<u8>)>,
401        now_ms: u64,
402    ) -> Result<()> {
403        let outcome = {
404            let mut guard = self.conversations.write();
405            let convo = guard
406                .get_mut(&conv_id)
407                .ok_or_else(|| Error::UnknownConversation(conv_id.as_hex()))?;
408            convo.add_members(entries, now_ms)?
409        };
410        self.transport.send(outcome.commit).await?;
411        self.transport.send(outcome.welcome).await?;
412        if let Some(c) = self.conversations.read().get(&conv_id) {
413            c.snapshot_to_storage().await?;
414        }
415        Ok(())
416    }
417
418    /// Admits `new_device_id` to every conversation in `kps_per_chat` via
419    /// the standard MLS `add_members` flow — one Commit + one Welcome per
420    /// chat. This is the SDK-side replacement for the host's previous
421    /// per-chat reconciler loop after device linking; centralising it
422    /// here means iOS/Android/web hosts all share the orchestration and
423    /// the transport's Welcome-recipient priming is automatic.
424    ///
425    /// Inputs:
426    /// - `new_device_id`: the device being admitted (matches the
427    ///   `device_binding_sig` recipient in the linking ticket).
428    /// - `kps_per_chat`: one freshly-claimed KeyPackage per chat. The
429    ///   host claims these via the auth-layer's per-account KP pool
430    ///   (`GET /v1/devices/{accountId}`) AFTER the new device's
431    ///   bootstrap has uploaded its KP batch.
432    /// - `now_ms`: wall-clock used to stamp HLCs on the emitted
433    ///   envelopes.
434    ///
435    /// Per-chat failures (unknown conversation, MLS error, transport
436    /// error, etc.) are CAPTURED in the returned vec rather than
437    /// short-circuiting the whole call — losing one chat shouldn't
438    /// strand the new device on every other chat. The caller decides
439    /// whether to retry the failed entries (e.g. with a fresh KP).
440    #[allow(clippy::await_holding_lock)] // see add_members for rationale
441    pub async fn admit_device_to_chats(
442        &self,
443        new_device_id: DeviceId,
444        kps_per_chat: Vec<(ConversationId, Vec<u8>)>,
445        now_ms: u64,
446    ) -> Result<Vec<AdmitChatOutcome>> {
447        let mut outcomes = Vec::with_capacity(kps_per_chat.len());
448        for (conv_id, kp_bytes) in kps_per_chat {
449            // Belt-and-braces: skip the DeviceGroup. The DG was already
450            // welcomed via the linking ticket — re-adding the new
451            // device there would produce a duplicate-add Commit that
452            // BE de-dups, but the noise is avoidable.
453            let is_dg = self
454                .conversations
455                .read()
456                .get(&conv_id)
457                .map(|c| c.meta().is_device_group)
458                .unwrap_or(false);
459            if is_dg {
460                outcomes.push(AdmitChatOutcome {
461                    conversation_id: conv_id,
462                    status: AdmitChatStatus::Skipped {
463                        reason: "device_group".to_string(),
464                    },
465                });
466                continue;
467            }
468
469            // Prime the host transport with the welcome recipient BEFORE
470            // we mutate MLS state. If priming fails (non-web hosts use
471            // the default no-op), continue — the host's transport will
472            // either route some other way or surface a 4xx on the
473            // welcome send and we'll catch it below.
474            let _ = self
475                .transport
476                .set_next_welcome_recipients(conv_id, vec![new_device_id.clone()])
477                .await;
478
479            let entry = (new_device_id.clone(), kp_bytes);
480            let outcome_result = {
481                let mut guard = self.conversations.write();
482                match guard.get_mut(&conv_id) {
483                    Some(convo) => convo.add_members(vec![entry], now_ms),
484                    None => Err(Error::UnknownConversation(conv_id.as_hex())),
485                }
486            };
487
488            let outcome = match outcome_result {
489                Ok(o) => o,
490                Err(e) => {
491                    outcomes.push(AdmitChatOutcome {
492                        conversation_id: conv_id,
493                        status: AdmitChatStatus::Failed {
494                            error: e.to_string(),
495                        },
496                    });
497                    continue;
498                }
499            };
500
501            if let Err(e) = self.transport.send(outcome.commit).await {
502                outcomes.push(AdmitChatOutcome {
503                    conversation_id: conv_id,
504                    status: AdmitChatStatus::Failed {
505                        error: format!("commit send: {e}"),
506                    },
507                });
508                continue;
509            }
510            if let Err(e) = self.transport.send(outcome.welcome).await {
511                outcomes.push(AdmitChatOutcome {
512                    conversation_id: conv_id,
513                    status: AdmitChatStatus::Failed {
514                        error: format!("welcome send: {e}"),
515                    },
516                });
517                continue;
518            }
519
520            if let Some(c) = self.conversations.read().get(&conv_id) {
521                if let Err(e) = c.snapshot_to_storage().await {
522                    // Snapshot failure is non-fatal for the join — the
523                    // MLS adds already shipped — but record it so the
524                    // host can decide whether to retry. The next
525                    // successful send/process will re-snapshot anyway.
526                    outcomes.push(AdmitChatOutcome {
527                        conversation_id: conv_id,
528                        status: AdmitChatStatus::Failed {
529                            error: format!("snapshot: {e}"),
530                        },
531                    });
532                    continue;
533                }
534            }
535
536            outcomes.push(AdmitChatOutcome {
537                conversation_id: conv_id,
538                status: AdmitChatStatus::Admitted,
539            });
540        }
541        Ok(outcomes)
542    }
543
544    #[allow(clippy::await_holding_lock)] // see add_members for rationale
545    pub async fn remove_members(
546        &self,
547        conv_id: ConversationId,
548        leaf_indexes: Vec<u32>,
549        now_ms: u64,
550    ) -> Result<()> {
551        let envelope = {
552            let mut guard = self.conversations.write();
553            let convo = guard
554                .get_mut(&conv_id)
555                .ok_or_else(|| Error::UnknownConversation(conv_id.as_hex()))?;
556            convo.remove_members(leaf_indexes, now_ms)?
557        };
558        self.transport.send(envelope).await?;
559        if let Some(c) = self.conversations.read().get(&conv_id) {
560            c.snapshot_to_storage().await?;
561        }
562        Ok(())
563    }
564
565    /// Process an inbound envelope coming from the transport's subscribe callback or a sync pull.
566    /// Returns `Some` for application traffic, `None` for handshake messages (already merged).
567    #[allow(clippy::await_holding_lock)] // see add_members for rationale
568    pub async fn process_envelope(
569        &self,
570        env: &MessageEnvelope,
571        now_ms: u64,
572    ) -> Result<Option<IncomingMessage>> {
573        // Welcome envelopes for unknown conversations are routed to `join_conversation` by the
574        // caller. Here we only handle traffic for already-open groups.
575        let mut guard = self.conversations.write();
576        let convo = match guard.get_mut(&env.conversation_id) {
577            Some(c) => c,
578            None => return Err(Error::UnknownConversation(env.conversation_id.as_hex())),
579        };
580        let out = convo.process(env, now_ms)?;
581        // Cheap snapshot — only mutates KV the size of the cursor.
582        convo.snapshot_to_storage().await?;
583        Ok(out)
584    }
585
586    /// Catch-up sync: pull missing events for every open conversation since its cursor.
587    /// Returns the list of newly-decrypted application messages, in apply order.
588    pub async fn sync_conversations(&self, now_ms: u64) -> Result<Vec<IncomingMessage>> {
589        let pending: Vec<(ConversationId, SyncCursor)> = self
590            .conversations
591            .read()
592            .iter()
593            .map(|(id, c)| (*id, c.cursor.clone()))
594            .collect();
595
596        let mut delivered = Vec::new();
597        for (conv_id, cursor) in pending {
598            loop {
599                let batch = self
600                    .transport
601                    .fetch_since(conv_id, cursor.clone(), 256)
602                    .await?;
603                if batch.is_empty() {
604                    break;
605                }
606                for env in &batch {
607                    if let Some(msg) = self.process_envelope(env, now_ms).await? {
608                        delivered.push(msg);
609                    }
610                }
611                if batch.len() < 256 {
612                    break;
613                } // partial page → caught up
614            }
615        }
616        Ok(delivered)
617    }
618
619    /// Rehydrate conversations from storage on startup ([CR-4]).
620    ///
621    /// Walks the host-side `groups` namespace for meta records, pairs each with its
622    /// cursor + device→leaf map, and asks `Conversation::load` to re-attach to the
623    /// underlying OpenMLS group state. The MLS state itself was persisted by the
624    /// SQLite-backed `PersistentMlsProvider` on the previous run; this method
625    /// reconciles the SDK-side caches with what's on disk.
626    async fn rehydrate_conversations(self: &Arc<Self>, now_ms: u64) -> Result<()> {
627        let metas = self.storage.list_keys("groups", "").await?;
628        for path in metas {
629            // path looks like "{convId}/meta"
630            let Some((id_hex, suffix)) = path.split_once('/') else {
631                continue;
632            };
633            if suffix != "meta" {
634                continue;
635            }
636            let Some(meta_bytes) = self.storage.get("groups", &path).await? else {
637                continue;
638            };
639            let meta: ConversationMeta = match codec::decode(&meta_bytes) {
640                Ok(m) => m,
641                Err(_) => continue,
642            };
643            let cursor_bytes = self
644                .storage
645                .get("cursors", id_hex)
646                .await?
647                .unwrap_or_default();
648            let cursor = if cursor_bytes.is_empty() {
649                SyncCursor::default()
650            } else {
651                SyncCursor::decode(&cursor_bytes).unwrap_or_default()
652            };
653
654            // [CR-2] device→leaf map was persisted alongside meta + cursor.
655            let device_leaves_bytes = self
656                .storage
657                .get("device_leaves", id_hex)
658                .await?
659                .unwrap_or_default();
660            let device_leaves: std::collections::BTreeMap<DeviceId, u32> =
661                if device_leaves_bytes.is_empty() {
662                    std::collections::BTreeMap::new()
663                } else {
664                    let pairs: Vec<(DeviceId, u32)> =
665                        codec::decode(&device_leaves_bytes).unwrap_or_default();
666                    pairs.into_iter().collect()
667                };
668
669            match Conversation::load(
670                meta.id,
671                meta.clone(),
672                cursor,
673                device_leaves,
674                self.local_device.device_id.clone(),
675                self.crypto.clone(),
676                self.signing.clone(),
677                self.storage.clone(),
678                now_ms,
679            ) {
680                Ok(Some(convo)) => {
681                    tracing::debug!(
682                        target: "ping_core::client",
683                        convo = %id_hex,
684                        epoch = meta.epoch,
685                        "rehydrated conversation from disk"
686                    );
687                    self.conversations.write().insert(meta.id, convo);
688                }
689                Ok(None) => {
690                    tracing::warn!(
691                        target: "ping_core::client",
692                        convo = %id_hex,
693                        "host-side meta present but OpenMLS state missing — skipping"
694                    );
695                }
696                Err(e) => {
697                    tracing::warn!(
698                        target: "ping_core::client",
699                        convo = %id_hex,
700                        error = %e,
701                        "Conversation::load failed — skipping"
702                    );
703                }
704            }
705        }
706        Ok(())
707    }
708
709    // ------------------- Multi-device API -------------------
710
711    /// Build a [`LinkingTicket`] for a new device. The caller obtains `new_device_kp` from the
712    /// new device (e.g., via QR-encoded handshake) and is responsible for sealing the returned
713    /// ticket against the new device's ephemeral X25519 pubkey before transmission via
714    /// [`ping_link::seal_ticket`].
715    ///
716    /// [CR-13] `last_app_events` is a host-supplied list of `(conversation_id, app_event_bytes)`
717    /// for the new device's "what you missed" UI. The SDK adds its own metas + (currently-
718    /// empty) per-conversation MLS state and bundles everything into
719    /// [`device::CatchupSnapshot`], CBOR-encoded into the ticket's `catchup_snapshot` field.
720    /// Pass an empty `Vec` to suppress catchup data (the new device sees an empty
721    /// conversation list until normal sync runs).
722    pub async fn build_linking_ticket(
723        self: &Arc<Self>,
724        new_device_id: DeviceId,
725        new_device_kp: Vec<u8>,
726        last_app_events: Vec<(ConversationId, Vec<u8>)>,
727        now_ms: u64,
728    ) -> Result<LinkingTicket> {
729        let device_binding_sig = self.identity.sign_device_binding(&new_device_id.0);
730        let dg_id = device_group_id_for(self.identity.user_id());
731
732        // [CR-10] DG is eagerly created at init now, but call ensure here too so
733        // hosts that bypass `MessagingClient::init` (mocked tests, legacy upgrade
734        // paths) keep working.
735        self.ensure_device_group(now_ms).await?;
736
737        // Admit the new device to the DeviceGroup.
738        let outcome = {
739            let mut conversations = self.conversations.write();
740            let dg = conversations
741                .get_mut(&dg_id)
742                .expect("DeviceGroup ensured above");
743            // [CR-2] Record the new device's leaf in the DG so future `revoke_device`
744            // can find it. The new_device_id we got as a parameter is the inviter's
745            // own assertion — same trust model as the rest of `add_members`.
746            dg.add_members(vec![(new_device_id.clone(), new_device_kp)], now_ms)?
747        };
748
749        // [CR-13] Assemble the catchup snapshot: SDK-known conversation metadata + host-
750        // supplied last-known plaintext per conversation. [CR-7] now populates
751        // `group_state_bytes` with each group's MLS state so the new device can decrypt
752        // historical traffic without re-Welcoming. An empty `group_state_bytes` would
753        // mean either a group with no exportable state (shouldn't happen) or an
754        // encoder failure (we let those propagate as errors below).
755        let catchup_snapshot = if last_app_events.is_empty() && self.conversations.read().is_empty()
756        {
757            // Cheap path: nothing to snapshot, skip the encode round-trip.
758            Vec::new()
759        } else {
760            let conversation_metas: Vec<CatchupConversationEntry> = self
761                .conversations
762                .read()
763                .values()
764                .map(|c| -> Result<CatchupConversationEntry> {
765                    // CR-7: per-group state. We deliberately keep the export bytes
766                    // inside the (HPKE-sealed-by-CR-3) LinkingTicket; the receiver
767                    // calls `import_state_snapshot` with these bytes after `consume_linking_ticket`.
768                    let group_bytes = c.export_state_snapshot(now_ms)?.to_vec();
769                    Ok(CatchupConversationEntry {
770                        conversation_id: c.id(),
771                        meta: c.meta().clone(),
772                        group_state_bytes: group_bytes,
773                    })
774                })
775                .collect::<Result<_>>()?;
776            let last_app_events_per_conv: Vec<CatchupAppEventEntry> = last_app_events
777                .into_iter()
778                .map(|(conversation_id, app_event_bytes)| CatchupAppEventEntry {
779                    conversation_id,
780                    app_event_bytes,
781                })
782                .collect();
783            CatchupSnapshot {
784                v: CATCHUP_SNAPSHOT_VERSION,
785                conversation_metas,
786                last_app_events_per_conv,
787            }
788            .encode()?
789        };
790
791        Ok(LinkingTicket {
792            v: 1,
793            user_id: self.identity.user_id().clone(),
794            user_pubkey: self.identity.public_key().to_bytes().to_vec(),
795            new_device_id,
796            device_binding_sig,
797            device_group_welcome: outcome.welcome.payload,
798            catchup_snapshot,
799        })
800    }
801
802    /// Apply a received linking ticket. Joins the user's DeviceGroup; the catch-up snapshot
803    /// (if any) is decrypted by the host using the standard per-conversation channel afterwards.
804    pub async fn consume_linking_ticket(
805        self: &Arc<Self>,
806        ticket: &LinkingTicket,
807        now_ms: u64,
808    ) -> Result<()> {
809        // Verify the binding the existing device made for us. (Ed25519 public keys are 32 bytes.)
810        let pk_bytes: [u8; 32] = ticket
811            .user_pubkey
812            .as_slice()
813            .try_into()
814            .map_err(|_| Error::Identity("user_pubkey must be 32 bytes".into()))?;
815        let user_pk = ed25519_dalek::VerifyingKey::from_bytes(&pk_bytes)
816            .map_err(|e| Error::Identity(format!("bad user pubkey: {e}")))?;
817        Identity::verify_device_binding(
818            &user_pk,
819            &ticket.user_id,
820            &ticket.new_device_id.0,
821            &ticket.device_binding_sig,
822        )?;
823        if ticket.new_device_id != self.local_device.device_id {
824            return Err(Error::Invalid(
825                "ticket addressed to a different device".into(),
826            ));
827        }
828
829        let dummy_env = MessageEnvelope::new(
830            ConversationId(device_group_id_for(&ticket.user_id).0),
831            0,
832            MessageKind::Welcome,
833            self.local_device.device_id.clone(),
834            0,
835            crate::clock::Hlc::ZERO,
836            ticket.device_group_welcome.clone(),
837        );
838        self.join_conversation(&dummy_env, now_ms).await?;
839        Ok(())
840    }
841
842    /// [CR-7] Export the MLS state snapshot for one open conversation.
843    ///
844    /// Thin pass-through to [`Conversation::export_state_snapshot`]. Returned bytes
845    /// are wrapped in `Zeroizing` because they contain past epoch secrets.
846    pub fn export_conversation_state_snapshot(
847        &self,
848        conv_id: ConversationId,
849        now_ms: u64,
850    ) -> Result<zeroize::Zeroizing<Vec<u8>>> {
851        let guard = self.conversations.read();
852        let convo = guard
853            .get(&conv_id)
854            .ok_or_else(|| Error::UnknownConversation(conv_id.as_hex()))?;
855        convo.export_state_snapshot(now_ms)
856    }
857
858    /// [CR-7] Import a `GroupStateSnapshot` produced by another device's
859    /// [`Conversation::export_state_snapshot`].
860    ///
861    /// Replays the snapshot's entries into this client's OpenMLS provider, then
862    /// reconstructs the `Conversation` handle via `MlsGroup::load`. After return,
863    /// the conversation is in `list_conversations()` and `send`/`process_envelope`
864    /// work against it normally.
865    ///
866    /// **Scope.** This is for the *same-user* hand-off (linking, recovery). The
867    /// snapshot exposes the exporter's view of past epoch secrets for the target
868    /// group; only call this when the receiving device has been authenticated to
869    /// the same user identity (mnemonic, QR-handshake). Cross-user history transfer
870    /// uses HPKE-sealed AppEvent re-shares (umbrella §15.6), not this method.
871    ///
872    /// **Sanity.** Refuses snapshots whose `group_id` doesn't match the bytes the
873    /// receiver intends to claim — guards against host bugs that shuffle snapshots
874    /// between groups. Refuses mismatched OpenMLS storage versions outright; no
875    /// silent forward/back compatibility.
876    pub async fn import_state_snapshot(
877        self: &Arc<Self>,
878        snapshot_bytes: &[u8],
879        now_ms: u64,
880    ) -> Result<ConversationId> {
881        use crate::device::GroupStateSnapshot;
882        let snap = GroupStateSnapshot::decode(snapshot_bytes)
883            .map_err(|e| Error::Invalid(format!("snapshot decode: {e}")))?;
884
885        if snap.openmls_storage_version != openmls_traits::storage::CURRENT_VERSION {
886            return Err(Error::Invalid(format!(
887                "snapshot openmls_storage_version={} not supported (this SDK supports v={})",
888                snap.openmls_storage_version,
889                openmls_traits::storage::CURRENT_VERSION
890            )));
891        }
892
893        let conv_id = snap.group_id;
894
895        // Refuse if we already have an active handle for this conv — the host should
896        // close it first, otherwise import silently overwrites in-memory state and
897        // the existing handle becomes stale.
898        if self.conversations.read().contains_key(&conv_id) {
899            return Err(Error::Invalid(format!(
900                "conversation {} already open; close before importing snapshot",
901                conv_id.as_hex()
902            )));
903        }
904
905        // Replay raw KV pairs into the provider's working set.
906        let entries: Vec<(Vec<u8>, Vec<u8>)> =
907            snap.entries.into_iter().map(|e| (e.key, e.value)).collect();
908        self.crypto
909            .import_entries(entries)
910            .map_err(|e| Error::Storage(format!("import entries: {e}")))?;
911
912        // Reconstruct the Conversation handle. `Conversation::load` will return
913        // `Ok(None)` if OpenMLS still can't find the group — i.e. our snapshot was
914        // incomplete or for a different storage version.
915        let meta = ConversationMeta {
916            id: conv_id,
917            name: None,
918            epoch: 0, // will be overwritten from the loaded group state in process()
919            member_count: 0,
920            is_device_group: false, // host can flip this via meta update if needed
921            created_at_ms: now_ms,
922        };
923        let convo = Conversation::load(
924            conv_id,
925            meta,
926            SyncCursor::default(),
927            std::collections::BTreeMap::new(),
928            self.local_device.device_id.clone(),
929            self.crypto.clone(),
930            self.signing.clone(),
931            self.storage.clone(),
932            now_ms,
933        )?
934        .ok_or_else(|| {
935            Error::Invalid(
936                "snapshot imported but OpenMLS could not load the group — snapshot may be incomplete or storage version mismatched"
937                    .into(),
938            )
939        })?;
940
941        // Pull the live epoch + member count from the loaded group so the meta we
942        // just stubbed is consistent with what we'll observe on subsequent process_envelope.
943        let live_epoch = convo.epoch();
944        let live_members = convo.group.members().count() as u32;
945        let mut convo = convo;
946        convo.meta.epoch = live_epoch;
947        convo.meta.member_count = live_members;
948        convo.snapshot_to_storage().await?;
949
950        self.conversations.write().insert(conv_id, convo);
951        Ok(conv_id)
952    }
953
954    /// Export a derived secret from one conversation's MLS exporter ([CR-8]).
955    ///
956    /// Thin pass-through to [`Conversation::export_secret`]. See that method's doc comment
957    /// for the contract on `label`, `context`, length validation, and zeroization. The
958    /// returned `Zeroizing<Vec<u8>>` is automatically wiped when dropped.
959    pub fn export_conversation_secret(
960        &self,
961        conv_id: ConversationId,
962        label: &str,
963        context: &[u8],
964        length: usize,
965    ) -> Result<Zeroizing<Vec<u8>>> {
966        let guard = self.conversations.read();
967        let convo = guard
968            .get(&conv_id)
969            .ok_or_else(|| Error::UnknownConversation(conv_id.as_hex()))?;
970        convo.export_secret(label, context, length)
971    }
972
973    /// Revoke a device by removing its leaf from every conversation where we know its
974    /// position ([CR-2]).
975    ///
976    /// Returns one Commit envelope per conversation the device was a leaf in. The host
977    /// broadcasts each envelope to the affected conversation; the SDK has also already
978    /// handed them to the transport via `transport.send` (idempotent broadcast is the
979    /// host's call).
980    ///
981    /// **Scope.** The SDK can only resolve leaves it recorded itself — either when it
982    /// admitted the device via [`Self::add_members`] or when this device joined as the
983    /// target via Welcome. For peer-admitted devices the leaf index isn't locally known;
984    /// those conversations are silently skipped. The host can fall back to
985    /// `remove_members(leaf_index)` directly using a transport-side directory lookup if
986    /// it needs to revoke from those conversations too. See
987    /// `docs/architecture/multi-device.md §Device removal` for the broader flow.
988    ///
989    /// Conversations with no entry for `device_id` produce no envelope; an empty `Vec`
990    /// return is a valid outcome (e.g. the device was already revoked, or was never
991    /// added by this client).
992    #[allow(clippy::await_holding_lock)] // see add_members for rationale
993    pub async fn revoke_device(
994        &self,
995        device_id: DeviceId,
996        now_ms: u64,
997    ) -> Result<Vec<MessageEnvelope>> {
998        // 1. Walk every open conversation and gather (conv_id, leaf_index) pairs where
999        //    we know `device_id` controls a leaf. Done under a read lock so we don't hold
1000        //    the write lock across the per-conversation remove path.
1001        let targets: Vec<(ConversationId, u32)> = self
1002            .conversations
1003            .read()
1004            .iter()
1005            .filter_map(|(id, c)| c.leaf_index_of(&device_id).map(|leaf| (*id, leaf)))
1006            .collect();
1007
1008        // 2. For each target, emit a remove_members commit. We do this sequentially: each
1009        //    one is a separate MLS epoch advance on its own group, and they don't share
1010        //    state, so parallel issuance is safe but adds complexity we don't need for v1.
1011        let mut envelopes = Vec::with_capacity(targets.len());
1012        for (conv_id, leaf_index) in targets {
1013            let envelope = {
1014                let mut guard = self.conversations.write();
1015                let convo = guard
1016                    .get_mut(&conv_id)
1017                    .ok_or_else(|| Error::UnknownConversation(conv_id.as_hex()))?;
1018                convo.remove_members(vec![leaf_index], now_ms)?
1019            };
1020            self.transport.send(envelope.clone()).await?;
1021            if let Some(c) = self.conversations.read().get(&conv_id) {
1022                c.snapshot_to_storage().await?;
1023            }
1024            envelopes.push(envelope);
1025        }
1026
1027        // 3. Notify the auth-layer server so it can invalidate the
1028        //    revoked device's KeyPackage pool, mark `auth.devices.revoked_at`,
1029        //    and refuse any future envelope signed by the revoked device's
1030        //    JWT. Done AFTER the MLS Commits so peers learn via MLS first
1031        //    (the canonical path) and the auth layer is the eventual-
1032        //    consistency cleanup. Transport failures bubble up so callers
1033        //    can retry — but the MLS-side work has already shipped, so
1034        //    the device is functionally revoked in every group; only the
1035        //    auth-layer KeyPackage purge is pending.
1036        self.transport.revoke_device_remote(device_id).await?;
1037        Ok(envelopes)
1038    }
1039}
1040
1041fn device_group_id_for(user_id: &UserId) -> ConversationId {
1042    // Deterministic 16-byte ID derived from the user's id, prefixed so it cannot collide with
1043    // a randomly-generated ULID in normal use (ULIDs start with a millisecond timestamp).
1044    let mut bytes = [0u8; 16];
1045    bytes[0] = 0xFF;
1046    bytes[1] = 0xDC; // "DeviCe" group sentinel
1047    let h = codec::sha256(&user_id.0);
1048    bytes[2..].copy_from_slice(&h[..14]);
1049    ConversationId(bytes)
1050}
1051
1052fn encode_local_device(d: &LocalDevice) -> Result<Vec<u8>> {
1053    use serde::Serialize;
1054    #[derive(Serialize)]
1055    struct Persisted<'a> {
1056        device_id: &'a DeviceId,
1057        label: &'a str,
1058        created_at_ms: u64,
1059        #[serde(with = "serde_bytes")]
1060        signing_seed: &'a [u8],
1061    }
1062    codec::encode(&Persisted {
1063        device_id: &d.device_id,
1064        label: &d.label,
1065        created_at_ms: d.created_at_ms,
1066        signing_seed: d.signing.as_bytes(),
1067    })
1068}
1069
1070fn decode_local_device(bytes: &[u8], user_id: UserId) -> Result<LocalDevice> {
1071    use serde::Deserialize;
1072    #[derive(Deserialize)]
1073    struct Persisted {
1074        device_id: DeviceId,
1075        label: String,
1076        created_at_ms: u64,
1077        #[serde(with = "serde_bytes")]
1078        signing_seed: Vec<u8>,
1079    }
1080    let p: Persisted = codec::decode(bytes)?;
1081    let seed: [u8; 32] = p
1082        .signing_seed
1083        .as_slice()
1084        .try_into()
1085        .map_err(|_| Error::Invalid("device signing seed must be 32 bytes".into()))?;
1086    let signing = ed25519_dalek::SigningKey::from_bytes(&seed);
1087    Ok(LocalDevice {
1088        device_id: p.device_id,
1089        user_id,
1090        label: p.label,
1091        signing,
1092        created_at_ms: p.created_at_ms,
1093    })
1094}