Skip to main content

ping_core/
client.rs

1//! `MessagingClient` — top-level handle. Owns the OpenMLS provider, identity, local device,
2//! and the set of open conversations.
3//!
4//! All operations are `async`. The intent is that the FFI generators emit Swift `async`,
5//! Kotlin `suspend`, and the WASM glue exposes Promises.
6
7use openmls::framing::MlsMessageOut;
8use openmls::prelude::{
9    tls_codec::Serialize as TlsSerialize, BasicCredential, Ciphersuite, CredentialWithKey,
10    KeyPackageBuilder,
11};
12use openmls_basic_credential::SignatureKeyPair;
13use openmls_traits::OpenMlsProvider;
14use parking_lot::RwLock;
15use ping_mls_store::{PersistentMlsProvider, StorageBackend};
16use std::collections::HashMap;
17use std::sync::Arc;
18use zeroize::Zeroizing;
19
20use crate::{
21    codec,
22    conversation::{Conversation, ConversationId, ConversationMeta},
23    device::{
24        CatchupAppEventEntry, CatchupConversationEntry, CatchupSnapshot, DeviceId, DeviceInfo,
25        LinkingTicket, LocalDevice, CATCHUP_SNAPSHOT_VERSION,
26    },
27    error::{Error, Result},
28    identity::{Identity, UserId},
29    message::{IncomingMessage, MessageEnvelope, MessageKind},
30    storage::Storage,
31    sync::SyncCursor,
32    transport::Transport,
33};
34
35const DEFAULT_CIPHERSUITE: Ciphersuite = Ciphersuite::MLS_128_DHKEMX25519_AES128GCM_SHA256_Ed25519;
36
37/// Per-chat result reported by [`MessagingClient::admit_device_to_chats`].
38#[derive(Debug, Clone)]
39pub struct AdmitChatOutcome {
40    pub conversation_id: ConversationId,
41    pub status: AdmitChatStatus,
42}
43
44#[derive(Debug, Clone)]
45pub enum AdmitChatStatus {
46    /// The new device is now an MLS leaf in this chat. Both the Commit
47    /// and the addressed Welcome have been sent.
48    Admitted,
49    /// We chose not to admit (e.g. the conversation is a DeviceGroup,
50    /// which was already handled at linking-ticket build time).
51    Skipped { reason: String },
52    /// MLS or transport rejected the admission. `error` is the underlying
53    /// message — typically a `transport error: ...` or an OpenMLS error.
54    Failed { error: String },
55}
56
57#[derive(Debug)]
58pub struct ClientConfig {
59    pub identity: Identity,
60    pub device_label: String,
61    pub storage: Arc<dyn Storage>,
62    pub transport: Arc<dyn Transport>,
63    /// Wall clock in ms. Pulled from the host so we can use a synthetic clock in tests.
64    pub now_ms: u64,
65    /// [CR-4] OpenMLS-provider backend. Defaults to in-memory; iOS NSE and web SW
66    /// cold-start paths MUST pass `StorageBackend::Sqlite { path, encryption_key }`
67    /// (native) or `StorageBackend::IndexedDb { db_name }` (WASM, when that lands).
68    /// See `docs/design/CR4_CR7_PERSISTENCE.md`.
69    pub storage_backend: StorageBackend,
70    /// Optional 32-byte Ed25519 secret key the SDK should use as the
71    /// device signing key. When set AND no `LocalDevice` is yet
72    /// persisted in `storage`, the SDK constructs its first
73    /// `LocalDevice` from this key instead of generating a fresh
74    /// random one — so `device_id = SHA-256(public_key_of(secret))`
75    /// is fully determined by what the host provided.
76    ///
77    /// Use case: align the SDK's `device_id` (which it stamps into
78    /// every envelope's `sender_device` field) with an externally-
79    /// computed device id — typically `SHA-256(device_signing_pubkey)`
80    /// in the host's auth layer, where the JWT carries that same
81    /// value as its `device_id` claim. Without this alignment, a
82    /// server that validates `envelope.sender_device ==
83    /// jwt.device_id` would reject every send.
84    ///
85    /// Ignored on re-init (when storage already has a persisted
86    /// `LocalDevice`) so the device identity remains stable across
87    /// restarts.
88    pub device_signing_secret_key: Option<[u8; 32]>,
89}
90
91impl ClientConfig {
92    /// Construct a config with `StorageBackend::Memory` — convenient for tests and
93    /// the existing v0.1 in-memory flow.
94    pub fn new_in_memory(
95        identity: Identity,
96        device_label: String,
97        storage: Arc<dyn Storage>,
98        transport: Arc<dyn Transport>,
99        now_ms: u64,
100    ) -> Self {
101        Self {
102            identity,
103            device_label,
104            storage,
105            transport,
106            now_ms,
107            storage_backend: StorageBackend::Memory,
108            device_signing_secret_key: None,
109        }
110    }
111}
112
113pub struct MessagingClient {
114    pub(crate) identity: Identity,
115    pub(crate) local_device: LocalDevice,
116    pub(crate) crypto: Arc<PersistentMlsProvider>,
117    pub(crate) signing: Arc<SignatureKeyPair>,
118    pub(crate) storage: Arc<dyn Storage>,
119    pub(crate) transport: Arc<dyn Transport>,
120    conversations: RwLock<HashMap<ConversationId, Conversation>>,
121}
122
123impl std::fmt::Debug for MessagingClient {
124    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
125        f.debug_struct("MessagingClient")
126            .field("user_id", &self.identity.user_id().as_hex())
127            .field("device_id", &self.local_device.device_id.as_hex())
128            .field("conversation_count", &self.conversations.read().len())
129            .finish()
130    }
131}
132
133impl MessagingClient {
134    /// Initialise. Creates a new local device if none is recorded in storage; otherwise rehydrates.
135    pub async fn init(cfg: ClientConfig) -> Result<Arc<Self>> {
136        // [CR-4] OpenMLS provider is now pluggable. For `StorageBackend::Memory` this
137        // behaves like the old `OpenMlsRustCrypto::default()`. For `Sqlite`, the
138        // working set is hydrated from the on-disk blob; subsequent `checkpoint` calls
139        // flush it back. iOS NSE / web SW cold-start lives here.
140        //
141        // Use `open_async` so the WASM `StorageBackend::IndexedDb` variant can read
142        // its snapshot blob through the host-supplied `AsyncBlobStore` before
143        // returning — without this, the provider's `MemoryStorage` would be empty
144        // and `MlsGroup::load` would silently return `None` for every group on
145        // cold restart, breaking chat persistence across reloads. Native targets
146        // (Memory + Sqlite) delegate to the sync path under the hood, so the
147        // `.await` is free there.
148        let crypto = PersistentMlsProvider::open_async(cfg.storage_backend.clone())
149            .await
150            .map_err(|e| Error::Storage(format!("provider open: {e}")))?;
151        let local_device = match cfg.storage.get("device", "local").await? {
152            Some(bytes) => decode_local_device(&bytes, cfg.identity.user_id().clone())?,
153            None => {
154                // First-init path. If the host supplied a signing secret
155                // (typically to align the device_id with their auth
156                // layer), use it; otherwise mint a fresh random key.
157                // Either way, the constructed `LocalDevice` is
158                // immediately persisted so future inits load from
159                // storage without consulting the override again.
160                let dev = match cfg.device_signing_secret_key.as_ref() {
161                    Some(secret) => LocalDevice::from_signing_secret(
162                        cfg.identity.user_id().clone(),
163                        cfg.device_label,
164                        cfg.now_ms,
165                        secret,
166                    ),
167                    None => LocalDevice::generate(
168                        cfg.identity.user_id().clone(),
169                        cfg.device_label,
170                        cfg.now_ms,
171                    ),
172                };
173                let bytes = encode_local_device(&dev)?;
174                cfg.storage.put("device", "local", bytes).await?;
175                dev
176            }
177        };
178
179        // [CR-4] MLS signing keypair MUST be stable across cold restarts — otherwise the
180        // leaf-key stored on disk no longer matches the per-client key on re-init, and any
181        // send-after-restart silently misroutes. We derive deterministically from the
182        // already-persistent `LocalDevice::signing` (Ed25519, 32 raw bytes), and the
183        // ciphersuite's signature scheme is Ed25519 too — so the device signing key and the
184        // MLS leaf signing key are the same bytes. The MLS storage provider also receives
185        // a copy via `store()` so OpenMLS-internal lookups (process_message, etc.) succeed.
186        let signing = {
187            let sk_bytes = local_device.signing.to_bytes().to_vec();
188            let pk_bytes = local_device.signing.verifying_key().to_bytes().to_vec();
189            let kp = SignatureKeyPair::from_raw(
190                DEFAULT_CIPHERSUITE.signature_algorithm(),
191                sk_bytes,
192                pk_bytes,
193            );
194            kp.store(crypto.storage()).map_err(Error::mls)?;
195            Arc::new(kp)
196        };
197
198        let client = Arc::new(Self {
199            identity: cfg.identity,
200            local_device,
201            crypto,
202            signing,
203            storage: cfg.storage,
204            transport: cfg.transport,
205            conversations: RwLock::new(HashMap::new()),
206        });
207
208        client.rehydrate_conversations(cfg.now_ms).await?;
209
210        // [CR-10] Ensure the DeviceGroup exists at init, not lazily inside
211        // build_linking_ticket. Single-device users need somewhere to write
212        // personal events (drafts, read pointers, notes, vault wrapper)
213        // even before they pair a second device. Lazy creation in
214        // build_linking_ticket left them with no DG → no place for
215        // personal state to land.
216        //
217        // Idempotent — re-init after a cold restart finds the DG via
218        // rehydrate_conversations and this becomes a no-op.
219        client.ensure_device_group(cfg.now_ms).await?;
220
221        Ok(client)
222    }
223
224    /// [CR-10] Idempotently ensures this user's DeviceGroup exists in
225    /// `self.conversations`. Called from `init` (so single-device users
226    /// have a DG immediately) and from `build_linking_ticket` (the legacy
227    /// lazy path; still safe to call when the DG already exists, since
228    /// rehydrate_conversations would have re-attached it before init
229    /// returned).
230    ///
231    /// The DeviceGroup is a one-leaf MLS group at creation time —
232    /// `add_members` (called by `build_linking_ticket` when a second
233    /// device pairs in) is what grows it. We persist the snapshot so a
234    /// cold restart picks it up before this function runs again.
235    pub(crate) async fn ensure_device_group(self: &Arc<Self>, now_ms: u64) -> Result<()> {
236        let dg_id = device_group_id_for(self.identity.user_id());
237        if self.conversations.read().contains_key(&dg_id) {
238            return Ok(());
239        }
240        let mut new_dg = Conversation::create(
241            dg_id,
242            Some("device-group".into()),
243            self.local_device.device_id.clone(),
244            self.identity.user_id(),
245            self.crypto.clone(),
246            self.signing.clone(),
247            self.storage.clone(),
248            now_ms,
249        )?;
250        new_dg.meta.is_device_group = true;
251        new_dg.snapshot_to_storage().await?;
252        self.conversations.write().insert(dg_id, new_dg);
253        Ok(())
254    }
255
256    pub fn user_id(&self) -> UserId {
257        self.identity.user_id().clone()
258    }
259    pub fn device_id(&self) -> DeviceId {
260        self.local_device.device_id.clone()
261    }
262    pub fn device_info(&self, now_ms: u64) -> DeviceInfo {
263        self.local_device.info(now_ms)
264    }
265
266    /// Generate a fresh KeyPackage to publish to the directory. Hosts call this when registering
267    /// a device or topping up the directory.
268    pub fn fresh_key_package(&self) -> Result<Vec<u8>> {
269        let credential_with_key = CredentialWithKey {
270            credential: BasicCredential::new(self.identity.user_id().0.clone()).into(),
271            signature_key: self.signing.public().to_vec().into(),
272        };
273        let bundle = KeyPackageBuilder::new()
274            .build(
275                DEFAULT_CIPHERSUITE,
276                self.crypto.as_ref(),
277                self.signing.as_ref(),
278                credential_with_key,
279            )
280            .map_err(Error::mls)?;
281        // KeyPackages are serialized as MlsMessage(KeyPackage) per the MLS framing spec.
282        let msg: MlsMessageOut = bundle.key_package().clone().into();
283        msg.tls_serialize_detached().map_err(Error::mls)
284    }
285
286    /// Create a new conversation owned by this client (and seeded with a single member: this device).
287    pub async fn create_conversation(
288        self: &Arc<Self>,
289        name: Option<String>,
290        now_ms: u64,
291    ) -> Result<ConversationId> {
292        let id = ConversationId::new();
293        let convo = Conversation::create(
294            id,
295            name,
296            self.local_device.device_id.clone(),
297            self.identity.user_id(),
298            self.crypto.clone(),
299            self.signing.clone(),
300            self.storage.clone(),
301            now_ms,
302        )?;
303        convo.snapshot_to_storage().await?;
304        self.conversations.write().insert(id, convo);
305        Ok(id)
306    }
307
308    /// Join via a Welcome bundled in a [`MessageEnvelope`] of kind `Welcome`.
309    pub async fn join_conversation(
310        self: &Arc<Self>,
311        welcome_envelope: &MessageEnvelope,
312        now_ms: u64,
313    ) -> Result<ConversationId> {
314        if welcome_envelope.kind != MessageKind::Welcome {
315            return Err(Error::Invalid("expected Welcome envelope".into()));
316        }
317        let convo = Conversation::join(
318            &welcome_envelope.payload,
319            self.local_device.device_id.clone(),
320            self.crypto.clone(),
321            self.signing.clone(),
322            self.storage.clone(),
323            now_ms,
324        )?;
325        let id = convo.id();
326        convo.snapshot_to_storage().await?;
327        self.conversations.write().insert(id, convo);
328        Ok(id)
329    }
330
331    pub fn list_conversations(&self) -> Vec<ConversationMeta> {
332        self.conversations
333            .read()
334            .values()
335            .map(|c| c.meta.clone())
336            .collect()
337    }
338
339    /// Send an application message. Returns once the envelope has been handed to the transport.
340    #[allow(clippy::await_holding_lock)] // see add_members for rationale
341    pub async fn send(
342        &self,
343        conv_id: ConversationId,
344        plaintext: Vec<u8>,
345        now_ms: u64,
346    ) -> Result<MessageEnvelope> {
347        let envelope = {
348            let mut guard = self.conversations.write();
349            let convo = guard
350                .get_mut(&conv_id)
351                .ok_or_else(|| Error::UnknownConversation(conv_id.as_hex()))?;
352            convo.send_application(&plaintext, now_ms)?
353        };
354        self.transport.send(envelope.clone()).await?;
355        // The OpenMLS sender ratchet advances on every Application message — `seq` + `hlc`
356        // are bumped on the conversation, and the underlying group keystore stores new
357        // generation keys. Without a checkpoint here, a reload rolls back to the pre-send
358        // state and the next send re-uses an already-consumed generation that receivers
359        // silently drop. Mirrors the snapshot calls after every Commit/Welcome op.
360        if let Some(c) = self.conversations.read().get(&conv_id) {
361            c.snapshot_to_storage().await?;
362        }
363        Ok(envelope)
364    }
365
366    /// Add members. The Commit goes on the wire; the Welcome should be delivered to the new
367    /// devices' inboxes (the host transport implements that — typically as a separate addressed
368    /// envelope).
369    ///
370    /// [CR-2] Each entry is `(DeviceId, KeyPackage_bytes)`. The host typically gets the
371    /// device_id from the directory at the same time it gets the KeyPackage; we use it to
372    /// record a per-conversation `device_id → leaf_index` map so [`Self::revoke_device`]
373    /// can later locate the leaf without a fresh directory lookup. The SDK does not
374    /// cryptographically verify the host's device-id claim — that's a directory policy
375    /// concern.
376    //
377    // We hold a `parking_lot` read guard across `.await` for `snapshot_to_storage` here. Clippy
378    // flags this; we keep it for v0.1 because the alternative is a structural refactor of
379    // Conversation::snapshot_to_storage to split sync prep from async writes — see
380    // docs/ASSUMPTIONS.md item "lock-during-async-I/O is suboptimal but acceptable for v0.1".
381    // The `parking_lot/send_guard` feature (in core/Cargo.toml) makes the guard `Send` so the
382    // future is still schedulable across tokio threads.
383    #[allow(clippy::await_holding_lock)]
384    pub async fn add_members(
385        &self,
386        conv_id: ConversationId,
387        entries: Vec<(DeviceId, Vec<u8>)>,
388        now_ms: u64,
389    ) -> Result<()> {
390        let outcome = {
391            let mut guard = self.conversations.write();
392            let convo = guard
393                .get_mut(&conv_id)
394                .ok_or_else(|| Error::UnknownConversation(conv_id.as_hex()))?;
395            convo.add_members(entries, now_ms)?
396        };
397        self.transport.send(outcome.commit).await?;
398        self.transport.send(outcome.welcome).await?;
399        if let Some(c) = self.conversations.read().get(&conv_id) {
400            c.snapshot_to_storage().await?;
401        }
402        Ok(())
403    }
404
405    /// Admits `new_device_id` to every conversation in `kps_per_chat` via
406    /// the standard MLS `add_members` flow — one Commit + one Welcome per
407    /// chat. This is the SDK-side replacement for the host's previous
408    /// per-chat reconciler loop after device linking; centralising it
409    /// here means iOS/Android/web hosts all share the orchestration and
410    /// the transport's Welcome-recipient priming is automatic.
411    ///
412    /// Inputs:
413    /// - `new_device_id`: the device being admitted (matches the
414    ///   `device_binding_sig` recipient in the linking ticket).
415    /// - `kps_per_chat`: one freshly-claimed KeyPackage per chat. The
416    ///   host claims these via the auth-layer's per-account KP pool
417    ///   (`GET /v1/devices/{accountId}`) AFTER the new device's
418    ///   bootstrap has uploaded its KP batch.
419    /// - `now_ms`: wall-clock used to stamp HLCs on the emitted
420    ///   envelopes.
421    ///
422    /// Per-chat failures (unknown conversation, MLS error, transport
423    /// error, etc.) are CAPTURED in the returned vec rather than
424    /// short-circuiting the whole call — losing one chat shouldn't
425    /// strand the new device on every other chat. The caller decides
426    /// whether to retry the failed entries (e.g. with a fresh KP).
427    #[allow(clippy::await_holding_lock)] // see add_members for rationale
428    pub async fn admit_device_to_chats(
429        &self,
430        new_device_id: DeviceId,
431        kps_per_chat: Vec<(ConversationId, Vec<u8>)>,
432        now_ms: u64,
433    ) -> Result<Vec<AdmitChatOutcome>> {
434        let mut outcomes = Vec::with_capacity(kps_per_chat.len());
435        for (conv_id, kp_bytes) in kps_per_chat {
436            // Belt-and-braces: skip the DeviceGroup. The DG was already
437            // welcomed via the linking ticket — re-adding the new
438            // device there would produce a duplicate-add Commit that
439            // BE de-dups, but the noise is avoidable.
440            let is_dg = self
441                .conversations
442                .read()
443                .get(&conv_id)
444                .map(|c| c.meta().is_device_group)
445                .unwrap_or(false);
446            if is_dg {
447                outcomes.push(AdmitChatOutcome {
448                    conversation_id: conv_id,
449                    status: AdmitChatStatus::Skipped {
450                        reason: "device_group".to_string(),
451                    },
452                });
453                continue;
454            }
455
456            // Prime the host transport with the welcome recipient BEFORE
457            // we mutate MLS state. If priming fails (non-web hosts use
458            // the default no-op), continue — the host's transport will
459            // either route some other way or surface a 4xx on the
460            // welcome send and we'll catch it below.
461            let _ = self
462                .transport
463                .set_next_welcome_recipients(conv_id, vec![new_device_id.clone()])
464                .await;
465
466            let entry = (new_device_id.clone(), kp_bytes);
467            let outcome_result = {
468                let mut guard = self.conversations.write();
469                match guard.get_mut(&conv_id) {
470                    Some(convo) => convo.add_members(vec![entry], now_ms),
471                    None => Err(Error::UnknownConversation(conv_id.as_hex())),
472                }
473            };
474
475            let outcome = match outcome_result {
476                Ok(o) => o,
477                Err(e) => {
478                    outcomes.push(AdmitChatOutcome {
479                        conversation_id: conv_id,
480                        status: AdmitChatStatus::Failed {
481                            error: e.to_string(),
482                        },
483                    });
484                    continue;
485                }
486            };
487
488            if let Err(e) = self.transport.send(outcome.commit).await {
489                outcomes.push(AdmitChatOutcome {
490                    conversation_id: conv_id,
491                    status: AdmitChatStatus::Failed {
492                        error: format!("commit send: {e}"),
493                    },
494                });
495                continue;
496            }
497            if let Err(e) = self.transport.send(outcome.welcome).await {
498                outcomes.push(AdmitChatOutcome {
499                    conversation_id: conv_id,
500                    status: AdmitChatStatus::Failed {
501                        error: format!("welcome send: {e}"),
502                    },
503                });
504                continue;
505            }
506
507            if let Some(c) = self.conversations.read().get(&conv_id) {
508                if let Err(e) = c.snapshot_to_storage().await {
509                    // Snapshot failure is non-fatal for the join — the
510                    // MLS adds already shipped — but record it so the
511                    // host can decide whether to retry. The next
512                    // successful send/process will re-snapshot anyway.
513                    outcomes.push(AdmitChatOutcome {
514                        conversation_id: conv_id,
515                        status: AdmitChatStatus::Failed {
516                            error: format!("snapshot: {e}"),
517                        },
518                    });
519                    continue;
520                }
521            }
522
523            outcomes.push(AdmitChatOutcome {
524                conversation_id: conv_id,
525                status: AdmitChatStatus::Admitted,
526            });
527        }
528        Ok(outcomes)
529    }
530
531    #[allow(clippy::await_holding_lock)] // see add_members for rationale
532    pub async fn remove_members(
533        &self,
534        conv_id: ConversationId,
535        leaf_indexes: Vec<u32>,
536        now_ms: u64,
537    ) -> Result<()> {
538        let envelope = {
539            let mut guard = self.conversations.write();
540            let convo = guard
541                .get_mut(&conv_id)
542                .ok_or_else(|| Error::UnknownConversation(conv_id.as_hex()))?;
543            convo.remove_members(leaf_indexes, now_ms)?
544        };
545        self.transport.send(envelope).await?;
546        if let Some(c) = self.conversations.read().get(&conv_id) {
547            c.snapshot_to_storage().await?;
548        }
549        Ok(())
550    }
551
552    /// Process an inbound envelope coming from the transport's subscribe callback or a sync pull.
553    /// Returns `Some` for application traffic, `None` for handshake messages (already merged).
554    #[allow(clippy::await_holding_lock)] // see add_members for rationale
555    pub async fn process_envelope(
556        &self,
557        env: &MessageEnvelope,
558        now_ms: u64,
559    ) -> Result<Option<IncomingMessage>> {
560        // Welcome envelopes for unknown conversations are routed to `join_conversation` by the
561        // caller. Here we only handle traffic for already-open groups.
562        let mut guard = self.conversations.write();
563        let convo = match guard.get_mut(&env.conversation_id) {
564            Some(c) => c,
565            None => return Err(Error::UnknownConversation(env.conversation_id.as_hex())),
566        };
567        let out = convo.process(env, now_ms)?;
568        // Cheap snapshot — only mutates KV the size of the cursor.
569        convo.snapshot_to_storage().await?;
570        Ok(out)
571    }
572
573    /// Catch-up sync: pull missing events for every open conversation since its cursor.
574    /// Returns the list of newly-decrypted application messages, in apply order.
575    pub async fn sync_conversations(&self, now_ms: u64) -> Result<Vec<IncomingMessage>> {
576        let pending: Vec<(ConversationId, SyncCursor)> = self
577            .conversations
578            .read()
579            .iter()
580            .map(|(id, c)| (*id, c.cursor.clone()))
581            .collect();
582
583        let mut delivered = Vec::new();
584        for (conv_id, cursor) in pending {
585            loop {
586                let batch = self
587                    .transport
588                    .fetch_since(conv_id, cursor.clone(), 256)
589                    .await?;
590                if batch.is_empty() {
591                    break;
592                }
593                for env in &batch {
594                    if let Some(msg) = self.process_envelope(env, now_ms).await? {
595                        delivered.push(msg);
596                    }
597                }
598                if batch.len() < 256 {
599                    break;
600                } // partial page → caught up
601            }
602        }
603        Ok(delivered)
604    }
605
606    /// Rehydrate conversations from storage on startup ([CR-4]).
607    ///
608    /// Walks the host-side `groups` namespace for meta records, pairs each with its
609    /// cursor + device→leaf map, and asks `Conversation::load` to re-attach to the
610    /// underlying OpenMLS group state. The MLS state itself was persisted by the
611    /// SQLite-backed `PersistentMlsProvider` on the previous run; this method
612    /// reconciles the SDK-side caches with what's on disk.
613    async fn rehydrate_conversations(self: &Arc<Self>, now_ms: u64) -> Result<()> {
614        let metas = self.storage.list_keys("groups", "").await?;
615        for path in metas {
616            // path looks like "{convId}/meta"
617            let Some((id_hex, suffix)) = path.split_once('/') else {
618                continue;
619            };
620            if suffix != "meta" {
621                continue;
622            }
623            let Some(meta_bytes) = self.storage.get("groups", &path).await? else {
624                continue;
625            };
626            let meta: ConversationMeta = match codec::decode(&meta_bytes) {
627                Ok(m) => m,
628                Err(_) => continue,
629            };
630            let cursor_bytes = self
631                .storage
632                .get("cursors", id_hex)
633                .await?
634                .unwrap_or_default();
635            let cursor = if cursor_bytes.is_empty() {
636                SyncCursor::default()
637            } else {
638                SyncCursor::decode(&cursor_bytes).unwrap_or_default()
639            };
640
641            // [CR-2] device→leaf map was persisted alongside meta + cursor.
642            let device_leaves_bytes = self
643                .storage
644                .get("device_leaves", id_hex)
645                .await?
646                .unwrap_or_default();
647            let device_leaves: std::collections::BTreeMap<DeviceId, u32> =
648                if device_leaves_bytes.is_empty() {
649                    std::collections::BTreeMap::new()
650                } else {
651                    let pairs: Vec<(DeviceId, u32)> =
652                        codec::decode(&device_leaves_bytes).unwrap_or_default();
653                    pairs.into_iter().collect()
654                };
655
656            match Conversation::load(
657                meta.id,
658                meta.clone(),
659                cursor,
660                device_leaves,
661                self.local_device.device_id.clone(),
662                self.crypto.clone(),
663                self.signing.clone(),
664                self.storage.clone(),
665                now_ms,
666            ) {
667                Ok(Some(convo)) => {
668                    tracing::debug!(
669                        target: "ping_core::client",
670                        convo = %id_hex,
671                        epoch = meta.epoch,
672                        "rehydrated conversation from disk"
673                    );
674                    self.conversations.write().insert(meta.id, convo);
675                }
676                Ok(None) => {
677                    tracing::warn!(
678                        target: "ping_core::client",
679                        convo = %id_hex,
680                        "host-side meta present but OpenMLS state missing — skipping"
681                    );
682                }
683                Err(e) => {
684                    tracing::warn!(
685                        target: "ping_core::client",
686                        convo = %id_hex,
687                        error = %e,
688                        "Conversation::load failed — skipping"
689                    );
690                }
691            }
692        }
693        Ok(())
694    }
695
696    // ------------------- Multi-device API -------------------
697
698    /// Build a [`LinkingTicket`] for a new device. The caller obtains `new_device_kp` from the
699    /// new device (e.g., via QR-encoded handshake) and is responsible for sealing the returned
700    /// ticket against the new device's ephemeral X25519 pubkey before transmission via
701    /// [`ping_link::seal_ticket`].
702    ///
703    /// [CR-13] `last_app_events` is a host-supplied list of `(conversation_id, app_event_bytes)`
704    /// for the new device's "what you missed" UI. The SDK adds its own metas + (currently-
705    /// empty) per-conversation MLS state and bundles everything into
706    /// [`device::CatchupSnapshot`], CBOR-encoded into the ticket's `catchup_snapshot` field.
707    /// Pass an empty `Vec` to suppress catchup data (the new device sees an empty
708    /// conversation list until normal sync runs).
709    pub async fn build_linking_ticket(
710        self: &Arc<Self>,
711        new_device_id: DeviceId,
712        new_device_kp: Vec<u8>,
713        last_app_events: Vec<(ConversationId, Vec<u8>)>,
714        now_ms: u64,
715    ) -> Result<LinkingTicket> {
716        let device_binding_sig = self.identity.sign_device_binding(&new_device_id.0);
717        let dg_id = device_group_id_for(self.identity.user_id());
718
719        // [CR-10] DG is eagerly created at init now, but call ensure here too so
720        // hosts that bypass `MessagingClient::init` (mocked tests, legacy upgrade
721        // paths) keep working.
722        self.ensure_device_group(now_ms).await?;
723
724        // Admit the new device to the DeviceGroup.
725        let outcome = {
726            let mut conversations = self.conversations.write();
727            let dg = conversations
728                .get_mut(&dg_id)
729                .expect("DeviceGroup ensured above");
730            // [CR-2] Record the new device's leaf in the DG so future `revoke_device`
731            // can find it. The new_device_id we got as a parameter is the inviter's
732            // own assertion — same trust model as the rest of `add_members`.
733            dg.add_members(vec![(new_device_id.clone(), new_device_kp)], now_ms)?
734        };
735
736        // [CR-13] Assemble the catchup snapshot: SDK-known conversation metadata + host-
737        // supplied last-known plaintext per conversation. [CR-7] now populates
738        // `group_state_bytes` with each group's MLS state so the new device can decrypt
739        // historical traffic without re-Welcoming. An empty `group_state_bytes` would
740        // mean either a group with no exportable state (shouldn't happen) or an
741        // encoder failure (we let those propagate as errors below).
742        let catchup_snapshot = if last_app_events.is_empty() && self.conversations.read().is_empty()
743        {
744            // Cheap path: nothing to snapshot, skip the encode round-trip.
745            Vec::new()
746        } else {
747            let conversation_metas: Vec<CatchupConversationEntry> = self
748                .conversations
749                .read()
750                .values()
751                .map(|c| -> Result<CatchupConversationEntry> {
752                    // CR-7: per-group state. We deliberately keep the export bytes
753                    // inside the (HPKE-sealed-by-CR-3) LinkingTicket; the receiver
754                    // calls `import_state_snapshot` with these bytes after `consume_linking_ticket`.
755                    let group_bytes = c.export_state_snapshot(now_ms)?.to_vec();
756                    Ok(CatchupConversationEntry {
757                        conversation_id: c.id(),
758                        meta: c.meta().clone(),
759                        group_state_bytes: group_bytes,
760                    })
761                })
762                .collect::<Result<_>>()?;
763            let last_app_events_per_conv: Vec<CatchupAppEventEntry> = last_app_events
764                .into_iter()
765                .map(|(conversation_id, app_event_bytes)| CatchupAppEventEntry {
766                    conversation_id,
767                    app_event_bytes,
768                })
769                .collect();
770            CatchupSnapshot {
771                v: CATCHUP_SNAPSHOT_VERSION,
772                conversation_metas,
773                last_app_events_per_conv,
774            }
775            .encode()?
776        };
777
778        Ok(LinkingTicket {
779            v: 1,
780            user_id: self.identity.user_id().clone(),
781            user_pubkey: self.identity.public_key().to_bytes().to_vec(),
782            new_device_id,
783            device_binding_sig,
784            device_group_welcome: outcome.welcome.payload,
785            catchup_snapshot,
786        })
787    }
788
789    /// Apply a received linking ticket. Joins the user's DeviceGroup; the catch-up snapshot
790    /// (if any) is decrypted by the host using the standard per-conversation channel afterwards.
791    pub async fn consume_linking_ticket(
792        self: &Arc<Self>,
793        ticket: &LinkingTicket,
794        now_ms: u64,
795    ) -> Result<()> {
796        // Verify the binding the existing device made for us. (Ed25519 public keys are 32 bytes.)
797        let pk_bytes: [u8; 32] = ticket
798            .user_pubkey
799            .as_slice()
800            .try_into()
801            .map_err(|_| Error::Identity("user_pubkey must be 32 bytes".into()))?;
802        let user_pk = ed25519_dalek::VerifyingKey::from_bytes(&pk_bytes)
803            .map_err(|e| Error::Identity(format!("bad user pubkey: {e}")))?;
804        Identity::verify_device_binding(
805            &user_pk,
806            &ticket.user_id,
807            &ticket.new_device_id.0,
808            &ticket.device_binding_sig,
809        )?;
810        if ticket.new_device_id != self.local_device.device_id {
811            return Err(Error::Invalid(
812                "ticket addressed to a different device".into(),
813            ));
814        }
815
816        let dummy_env = MessageEnvelope::new(
817            ConversationId(device_group_id_for(&ticket.user_id).0),
818            0,
819            MessageKind::Welcome,
820            self.local_device.device_id.clone(),
821            0,
822            crate::clock::Hlc::ZERO,
823            ticket.device_group_welcome.clone(),
824        );
825        self.join_conversation(&dummy_env, now_ms).await?;
826        Ok(())
827    }
828
829    /// [CR-7] Export the MLS state snapshot for one open conversation.
830    ///
831    /// Thin pass-through to [`Conversation::export_state_snapshot`]. Returned bytes
832    /// are wrapped in `Zeroizing` because they contain past epoch secrets.
833    pub fn export_conversation_state_snapshot(
834        &self,
835        conv_id: ConversationId,
836        now_ms: u64,
837    ) -> Result<zeroize::Zeroizing<Vec<u8>>> {
838        let guard = self.conversations.read();
839        let convo = guard
840            .get(&conv_id)
841            .ok_or_else(|| Error::UnknownConversation(conv_id.as_hex()))?;
842        convo.export_state_snapshot(now_ms)
843    }
844
845    /// [CR-7] Import a `GroupStateSnapshot` produced by another device's
846    /// [`Conversation::export_state_snapshot`].
847    ///
848    /// Replays the snapshot's entries into this client's OpenMLS provider, then
849    /// reconstructs the `Conversation` handle via `MlsGroup::load`. After return,
850    /// the conversation is in `list_conversations()` and `send`/`process_envelope`
851    /// work against it normally.
852    ///
853    /// **Scope.** This is for the *same-user* hand-off (linking, recovery). The
854    /// snapshot exposes the exporter's view of past epoch secrets for the target
855    /// group; only call this when the receiving device has been authenticated to
856    /// the same user identity (mnemonic, QR-handshake). Cross-user history transfer
857    /// uses HPKE-sealed AppEvent re-shares (umbrella §15.6), not this method.
858    ///
859    /// **Sanity.** Refuses snapshots whose `group_id` doesn't match the bytes the
860    /// receiver intends to claim — guards against host bugs that shuffle snapshots
861    /// between groups. Refuses mismatched OpenMLS storage versions outright; no
862    /// silent forward/back compatibility.
863    pub async fn import_state_snapshot(
864        self: &Arc<Self>,
865        snapshot_bytes: &[u8],
866        now_ms: u64,
867    ) -> Result<ConversationId> {
868        use crate::device::GroupStateSnapshot;
869        let snap = GroupStateSnapshot::decode(snapshot_bytes)
870            .map_err(|e| Error::Invalid(format!("snapshot decode: {e}")))?;
871
872        if snap.openmls_storage_version != openmls_traits::storage::CURRENT_VERSION {
873            return Err(Error::Invalid(format!(
874                "snapshot openmls_storage_version={} not supported (this SDK supports v={})",
875                snap.openmls_storage_version,
876                openmls_traits::storage::CURRENT_VERSION
877            )));
878        }
879
880        let conv_id = snap.group_id;
881
882        // Refuse if we already have an active handle for this conv — the host should
883        // close it first, otherwise import silently overwrites in-memory state and
884        // the existing handle becomes stale.
885        if self.conversations.read().contains_key(&conv_id) {
886            return Err(Error::Invalid(format!(
887                "conversation {} already open; close before importing snapshot",
888                conv_id.as_hex()
889            )));
890        }
891
892        // Replay raw KV pairs into the provider's working set.
893        let entries: Vec<(Vec<u8>, Vec<u8>)> =
894            snap.entries.into_iter().map(|e| (e.key, e.value)).collect();
895        self.crypto
896            .import_entries(entries)
897            .map_err(|e| Error::Storage(format!("import entries: {e}")))?;
898
899        // Reconstruct the Conversation handle. `Conversation::load` will return
900        // `Ok(None)` if OpenMLS still can't find the group — i.e. our snapshot was
901        // incomplete or for a different storage version.
902        let meta = ConversationMeta {
903            id: conv_id,
904            name: None,
905            epoch: 0, // will be overwritten from the loaded group state in process()
906            member_count: 0,
907            is_device_group: false, // host can flip this via meta update if needed
908            created_at_ms: now_ms,
909        };
910        let convo = Conversation::load(
911            conv_id,
912            meta,
913            SyncCursor::default(),
914            std::collections::BTreeMap::new(),
915            self.local_device.device_id.clone(),
916            self.crypto.clone(),
917            self.signing.clone(),
918            self.storage.clone(),
919            now_ms,
920        )?
921        .ok_or_else(|| {
922            Error::Invalid(
923                "snapshot imported but OpenMLS could not load the group — snapshot may be incomplete or storage version mismatched"
924                    .into(),
925            )
926        })?;
927
928        // Pull the live epoch + member count from the loaded group so the meta we
929        // just stubbed is consistent with what we'll observe on subsequent process_envelope.
930        let live_epoch = convo.epoch();
931        let live_members = convo.group.members().count() as u32;
932        let mut convo = convo;
933        convo.meta.epoch = live_epoch;
934        convo.meta.member_count = live_members;
935        convo.snapshot_to_storage().await?;
936
937        self.conversations.write().insert(conv_id, convo);
938        Ok(conv_id)
939    }
940
941    /// Export a derived secret from one conversation's MLS exporter ([CR-8]).
942    ///
943    /// Thin pass-through to [`Conversation::export_secret`]. See that method's doc comment
944    /// for the contract on `label`, `context`, length validation, and zeroization. The
945    /// returned `Zeroizing<Vec<u8>>` is automatically wiped when dropped.
946    pub fn export_conversation_secret(
947        &self,
948        conv_id: ConversationId,
949        label: &str,
950        context: &[u8],
951        length: usize,
952    ) -> Result<Zeroizing<Vec<u8>>> {
953        let guard = self.conversations.read();
954        let convo = guard
955            .get(&conv_id)
956            .ok_or_else(|| Error::UnknownConversation(conv_id.as_hex()))?;
957        convo.export_secret(label, context, length)
958    }
959
960    /// Revoke a device by removing its leaf from every conversation where we know its
961    /// position ([CR-2]).
962    ///
963    /// Returns one Commit envelope per conversation the device was a leaf in. The host
964    /// broadcasts each envelope to the affected conversation; the SDK has also already
965    /// handed them to the transport via `transport.send` (idempotent broadcast is the
966    /// host's call).
967    ///
968    /// **Scope.** The SDK can only resolve leaves it recorded itself — either when it
969    /// admitted the device via [`Self::add_members`] or when this device joined as the
970    /// target via Welcome. For peer-admitted devices the leaf index isn't locally known;
971    /// those conversations are silently skipped. The host can fall back to
972    /// `remove_members(leaf_index)` directly using a transport-side directory lookup if
973    /// it needs to revoke from those conversations too. See
974    /// `docs/architecture/multi-device.md §Device removal` for the broader flow.
975    ///
976    /// Conversations with no entry for `device_id` produce no envelope; an empty `Vec`
977    /// return is a valid outcome (e.g. the device was already revoked, or was never
978    /// added by this client).
979    #[allow(clippy::await_holding_lock)] // see add_members for rationale
980    pub async fn revoke_device(
981        &self,
982        device_id: DeviceId,
983        now_ms: u64,
984    ) -> Result<Vec<MessageEnvelope>> {
985        // 1. Walk every open conversation and gather (conv_id, leaf_index) pairs where
986        //    we know `device_id` controls a leaf. Done under a read lock so we don't hold
987        //    the write lock across the per-conversation remove path.
988        let targets: Vec<(ConversationId, u32)> = self
989            .conversations
990            .read()
991            .iter()
992            .filter_map(|(id, c)| c.leaf_index_of(&device_id).map(|leaf| (*id, leaf)))
993            .collect();
994
995        // 2. For each target, emit a remove_members commit. We do this sequentially: each
996        //    one is a separate MLS epoch advance on its own group, and they don't share
997        //    state, so parallel issuance is safe but adds complexity we don't need for v1.
998        let mut envelopes = Vec::with_capacity(targets.len());
999        for (conv_id, leaf_index) in targets {
1000            let envelope = {
1001                let mut guard = self.conversations.write();
1002                let convo = guard
1003                    .get_mut(&conv_id)
1004                    .ok_or_else(|| Error::UnknownConversation(conv_id.as_hex()))?;
1005                convo.remove_members(vec![leaf_index], now_ms)?
1006            };
1007            self.transport.send(envelope.clone()).await?;
1008            if let Some(c) = self.conversations.read().get(&conv_id) {
1009                c.snapshot_to_storage().await?;
1010            }
1011            envelopes.push(envelope);
1012        }
1013
1014        // 3. Notify the auth-layer server so it can invalidate the
1015        //    revoked device's KeyPackage pool, mark `auth.devices.revoked_at`,
1016        //    and refuse any future envelope signed by the revoked device's
1017        //    JWT. Done AFTER the MLS Commits so peers learn via MLS first
1018        //    (the canonical path) and the auth layer is the eventual-
1019        //    consistency cleanup. Transport failures bubble up so callers
1020        //    can retry — but the MLS-side work has already shipped, so
1021        //    the device is functionally revoked in every group; only the
1022        //    auth-layer KeyPackage purge is pending.
1023        self.transport.revoke_device_remote(device_id).await?;
1024        Ok(envelopes)
1025    }
1026}
1027
1028fn device_group_id_for(user_id: &UserId) -> ConversationId {
1029    // Deterministic 16-byte ID derived from the user's id, prefixed so it cannot collide with
1030    // a randomly-generated ULID in normal use (ULIDs start with a millisecond timestamp).
1031    let mut bytes = [0u8; 16];
1032    bytes[0] = 0xFF;
1033    bytes[1] = 0xDC; // "DeviCe" group sentinel
1034    let h = codec::sha256(&user_id.0);
1035    bytes[2..].copy_from_slice(&h[..14]);
1036    ConversationId(bytes)
1037}
1038
1039fn encode_local_device(d: &LocalDevice) -> Result<Vec<u8>> {
1040    use serde::Serialize;
1041    #[derive(Serialize)]
1042    struct Persisted<'a> {
1043        device_id: &'a DeviceId,
1044        label: &'a str,
1045        created_at_ms: u64,
1046        #[serde(with = "serde_bytes")]
1047        signing_seed: &'a [u8],
1048    }
1049    codec::encode(&Persisted {
1050        device_id: &d.device_id,
1051        label: &d.label,
1052        created_at_ms: d.created_at_ms,
1053        signing_seed: d.signing.as_bytes(),
1054    })
1055}
1056
1057fn decode_local_device(bytes: &[u8], user_id: UserId) -> Result<LocalDevice> {
1058    use serde::Deserialize;
1059    #[derive(Deserialize)]
1060    struct Persisted {
1061        device_id: DeviceId,
1062        label: String,
1063        created_at_ms: u64,
1064        #[serde(with = "serde_bytes")]
1065        signing_seed: Vec<u8>,
1066    }
1067    let p: Persisted = codec::decode(bytes)?;
1068    let seed: [u8; 32] = p
1069        .signing_seed
1070        .as_slice()
1071        .try_into()
1072        .map_err(|_| Error::Invalid("device signing seed must be 32 bytes".into()))?;
1073    let signing = ed25519_dalek::SigningKey::from_bytes(&seed);
1074    Ok(LocalDevice {
1075        device_id: p.device_id,
1076        user_id,
1077        label: p.label,
1078        signing,
1079        created_at_ms: p.created_at_ms,
1080    })
1081}