huddle_core/crypto/sas.rs
1//! Short-Authentication-String (SAS) verification — Phase G.
2//!
3//! Two peers OOB-compare a short derived code to confirm they each
4//! hold the matching Ed25519 keys (defense against MITM during initial
5//! contact, before fingerprint trust is established).
6//!
7//! Protocol shape (each step is a signed `RoomMessage` on the room's
8//! gossipsub topic):
9//!
10//! 1. Initiator picks a random 16-byte `tx_id` + an ephemeral X25519
11//! keypair. Sends `SasInit { tx_id, ephemeral_x25519_pubkey, target_fp }`.
12//! 2. Responder generates their own ephemeral X25519 keypair, computes
13//! ECDH with the initiator's pubkey, derives the SAS code via
14//! `derive_sas_code(shared, tx_id)`, and replies with
15//! `SasResponse { tx_id, ephemeral_x25519_pubkey }`. The responder
16//! sees the code locally and shows it.
17//! 3. The initiator computes ECDH the other direction, derives the
18//! same code, shows it.
19//! 4. Both users compare codes OOB. Each side presses Match → broadcasts
20//! `SasConfirm { tx_id, matched: true }`.
21//! 5. On receiving the other side's `matched=true`, set the partner's
22//! fingerprint as `verified=true` (per-room + global `verified_peers`).
23//!
24//! The signatures on each envelope bind the ephemeral X25519 pubkeys to
25//! the sender's Ed25519 identity. A MITM who substitutes their own
26//! ephemeral key into the exchange ends up with a *different* SAS code
27//! than the legitimate peer would compute, so the OOB comparison fails.
28//!
29//! ## SAS table — Matrix MSC 2241 alignment (huddle 0.3.x follow-up)
30//!
31//! Previously the emoji table was a 64-entry "in spirit" derivative;
32//! it has been realigned to the canonical 49-entry Matrix MSC 2241
33//! table so any future cross-client SAS interop just works. The
34//! derivation now uses 7 emoji (42 bits / 6 = 7 chunks, each mod 49)
35//! and 3 four-digit decimal groups (39 bits / 13 = 3 chunks, each
36//! offset +1000 so values land in 1000..9192), exactly as MSC 2241
37//! specifies.
38
39use hkdf::Hkdf;
40use rand::RngCore;
41use sha2::Sha256;
42use x25519_dalek::{PublicKey, StaticSecret};
43
44use crate::error::{HuddleError, Result};
45
46/// Length of the transaction id used as HKDF salt. 16 bytes (128 bits)
47/// is plenty of unforgeability; sized to be base64-friendly.
48pub const TX_ID_LEN: usize = 16;
49
50/// SAS code information given to both sides for OOB comparison.
51#[derive(Debug, Clone, PartialEq, Eq)]
52pub struct SasCode {
53 /// 7 emoji indices into [`SAS_EMOJI`] (each 0..49). Human-friendly
54 /// for visual comparison; works in any modern terminal with emoji
55 /// support. Matches Matrix MSC 2241 shape.
56 pub emoji_indices: [u8; 7],
57 /// Three 4-digit groups separated by `-`, each in `1000..=9191`,
58 /// per MSC 2241. Easier to read aloud than a flat 7-digit number.
59 pub decimal: String,
60}
61
62impl SasCode {
63 pub fn emoji_string(&self) -> String {
64 self.emoji_indices
65 .iter()
66 .map(|i| SAS_EMOJI[*i as usize].0)
67 .collect::<Vec<_>>()
68 .join(" ")
69 }
70
71 pub fn emoji_labels(&self) -> String {
72 self.emoji_indices
73 .iter()
74 .map(|i| SAS_EMOJI[*i as usize].1)
75 .collect::<Vec<_>>()
76 .join(" / ")
77 }
78}
79
80/// Fresh X25519 ephemeral keypair + random tx_id. The secret stays on
81/// the initiator's machine until the SAS finishes; the pubkey is
82/// transmitted in the signed envelope.
83pub fn new_session() -> ([u8; TX_ID_LEN], StaticSecret, PublicKey) {
84 let mut tx_id = [0u8; TX_ID_LEN];
85 rand::thread_rng().fill_bytes(&mut tx_id);
86 // StaticSecret here is the X25519 "long-term" type from x25519-dalek;
87 // we use it as ephemeral (drop after the SAS). Need the
88 // `static_secrets` feature flag because the `EphemeralSecret` type
89 // is more restrictive in v2 — `StaticSecret` lets us hold onto it
90 // across a few async hops.
91 let secret = StaticSecret::random_from_rng(rand::thread_rng());
92 let public = PublicKey::from(&secret);
93 (tx_id, secret, public)
94}
95
96/// Derive the 7-emoji + 3-group-decimal SAS code from the X25519
97/// shared secret and the agreed-upon `tx_id`. Both peers compute this
98/// independently and must end up with the same answer for OOB
99/// comparison to succeed.
100///
101/// Matches the MSC 2241 derivation: HKDF-SHA256 with `tx_id` as salt
102/// and `b"huddle-sas-v1"` as info, expanded to 11 bytes. First 6 bytes
103/// → 7 6-bit chunks (mod 49) → emoji indices. Next 5 bytes → 3 13-bit
104/// chunks (+ 1000) → 3 four-digit decimal groups.
105pub fn derive_sas_code(
106 our_secret: &StaticSecret,
107 their_public: &PublicKey,
108 tx_id: &[u8; TX_ID_LEN],
109) -> Result<SasCode> {
110 let shared = our_secret.diffie_hellman(their_public);
111 // huddle 1.1.4: reject a non-contributory (small-order) peer ephemeral.
112 // Such a "pubkey" forces a predictable shared secret, which would let a
113 // MITM steer both sides to a derivable SAS code and defeat the OOB
114 // comparison. Honest peers always produce a contributory secret.
115 if !shared.was_contributory() {
116 return Err(HuddleError::Session(
117 "SAS rejected: peer X25519 ephemeral is non-contributory (small-order point)".into(),
118 ));
119 }
120 // HKDF over the shared secret. tx_id as salt prevents replay
121 // (two SAS flows between the same pair must produce different
122 // codes); info domain-separates from any other HKDF use.
123 let hk = Hkdf::<Sha256>::new(Some(tx_id), shared.as_bytes());
124 let mut okm = [0u8; 11];
125 hk.expand(b"huddle-sas-v1", &mut okm)
126 .expect("11 bytes is well within HKDF output limit");
127
128 // First 6 bytes = 48 bits. Use the high 42 bits (7 × 6) for emoji.
129 // Bit extraction (big-endian, MSB-first):
130 let b = &okm[..6];
131 let mut raw_emoji = [0u8; 7];
132 raw_emoji[0] = b[0] >> 2;
133 raw_emoji[1] = ((b[0] & 0x03) << 4) | (b[1] >> 4);
134 raw_emoji[2] = ((b[1] & 0x0f) << 2) | (b[2] >> 6);
135 raw_emoji[3] = b[2] & 0x3f;
136 raw_emoji[4] = b[3] >> 2;
137 raw_emoji[5] = ((b[3] & 0x03) << 4) | (b[4] >> 4);
138 raw_emoji[6] = ((b[4] & 0x0f) << 2) | (b[5] >> 6);
139 // huddle 0.7.11: rejection sampling instead of `raw % 49`.
140 // 6-bit values in 0..64 mod 49 makes indices 0..14 twice as likely
141 // (hit by raw 0..14 AND raw 49..63), measurably under-sampling the
142 // 49^7 SAS space and reducing effective entropy. Now we expand
143 // additional HKDF output to refill any byte that falls in 49..63
144 // — the canonical MSC 2241 approach. The expansion is cheap and
145 // deterministic, so both sides still derive the same code.
146 let emoji_indices = derive_emoji_indices_rejection(&hk, raw_emoji);
147
148 // Bytes 6..11 = 40 bits. Use the high 39 bits for the decimal
149 // (3 × 13-bit chunks, each offset by 1000).
150 let d = &okm[6..11];
151 let chunk0 = ((u32::from(d[0]) << 5) | (u32::from(d[1]) >> 3)) & 0x1fff;
152 let chunk1 = ((u32::from(d[1] & 0x07) << 10)
153 | (u32::from(d[2]) << 2)
154 | (u32::from(d[3]) >> 6))
155 & 0x1fff;
156 let chunk2 = ((u32::from(d[3] & 0x3f) << 7) | (u32::from(d[4]) >> 1)) & 0x1fff;
157 let decimal = format!("{}-{}-{}", chunk0 + 1000, chunk1 + 1000, chunk2 + 1000);
158
159 Ok(SasCode {
160 emoji_indices,
161 decimal,
162 })
163}
164
165/// The canonical 49-emoji table from Matrix MSC 2241, English labels.
166/// Indices 0-48; the derivation above maps 6-bit HKDF chunks mod 49.
167pub const SAS_EMOJI: [(&str, &str); 49] = [
168 ("🐶", "dog"),
169 ("🐱", "cat"),
170 ("🦁", "lion"),
171 ("🐎", "horse"),
172 ("🦄", "unicorn"),
173 ("🐷", "pig"),
174 ("🐘", "elephant"),
175 ("🐰", "rabbit"),
176 ("🐼", "panda"),
177 ("🐓", "rooster"),
178 ("🐧", "penguin"),
179 ("🐢", "turtle"),
180 ("🐟", "fish"),
181 ("🐙", "octopus"),
182 ("🦋", "butterfly"),
183 ("🌷", "flower"),
184 ("🌳", "tree"),
185 ("🌵", "cactus"),
186 ("🍄", "mushroom"),
187 ("🌏", "globe"),
188 ("🌙", "moon"),
189 ("☁️", "cloud"),
190 ("🔥", "fire"),
191 ("🍌", "banana"),
192 ("🍎", "apple"),
193 ("🍓", "strawberry"),
194 ("🌽", "corn"),
195 ("🍕", "pizza"),
196 ("🎂", "cake"),
197 ("❤️", "heart"),
198 ("🙂", "smiley"),
199 ("🤖", "robot"),
200 ("🎩", "hat"),
201 ("👓", "glasses"),
202 ("🔧", "spanner"),
203 ("🎅", "santa"),
204 ("👍", "thumbs up"),
205 ("☂️", "umbrella"),
206 ("⌛", "hourglass"),
207 ("⏰", "clock"),
208 ("🎁", "gift"),
209 ("💡", "light bulb"),
210 ("📕", "book"),
211 ("✏️", "pencil"),
212 ("📎", "paperclip"),
213 ("✂️", "scissors"),
214 ("🔒", "lock"),
215 ("🔑", "key"),
216 ("🔨", "hammer"),
217];
218
219/// huddle 0.7.11: rejection-sampling emoji-index derivation. Refills any
220/// index ≥ 49 with deterministic additional HKDF expansion so the
221/// distribution over the 49-element table is uniform.
222fn derive_emoji_indices_rejection(
223 hk: &Hkdf<Sha256>,
224 initial: [u8; 7],
225) -> [u8; 7] {
226 let mut out = [0u8; 7];
227 let mut accepted = 0usize;
228 // Use the initial bytes first.
229 for &v in &initial {
230 if v < 49 {
231 out[accepted] = v;
232 accepted += 1;
233 if accepted == 7 {
234 return out;
235 }
236 }
237 }
238 // Refill by expanding additional 6-bit chunks. We pull in 6-byte
239 // blocks of HKDF output, each yielding 8 candidate 6-bit values
240 // (high-bit pair discarded — each byte gives one 6-bit candidate
241 // via `v & 0x3f`). The info string includes a salt counter so
242 // multiple refills don't repeat the same bytes.
243 let mut counter: u32 = 0;
244 while accepted < 7 {
245 let info = {
246 let mut buf = [0u8; 24];
247 buf[..16].copy_from_slice(b"huddle-sas-v1-rs");
248 buf[16..20].copy_from_slice(&counter.to_be_bytes());
249 buf
250 };
251 let mut block = [0u8; 32];
252 if hk.expand(&info, &mut block).is_err() {
253 // The expander only fails when len > 255 * HashLen (8160
254 // bytes for SHA-256); 32 is far under, so this branch is
255 // unreachable in practice. Fall back to modulo if it
256 // somehow happens — degrades to pre-0.7.11 behavior but
257 // never panics or hangs.
258 for v in &mut initial.iter().copied() {
259 if accepted < 7 {
260 out[accepted] = v % 49;
261 accepted += 1;
262 }
263 }
264 break;
265 }
266 for &byte in block.iter() {
267 let candidate = byte & 0x3f;
268 if candidate < 49 {
269 out[accepted] = candidate;
270 accepted += 1;
271 if accepted == 7 {
272 return out;
273 }
274 }
275 }
276 counter += 1;
277 }
278 out
279}
280
281/// Decode a base64-encoded 32-byte X25519 pubkey received over the wire.
282pub fn parse_pubkey(b64: &str) -> Result<PublicKey> {
283 use base64::engine::general_purpose::STANDARD as B64;
284 use base64::Engine;
285 let bytes = B64
286 .decode(b64)
287 .map_err(|e| HuddleError::Session(format!("bad x25519 pubkey b64: {e}")))?;
288 if bytes.len() != 32 {
289 return Err(HuddleError::Session(format!(
290 "x25519 pubkey is {} bytes, expected 32",
291 bytes.len()
292 )));
293 }
294 let mut arr = [0u8; 32];
295 arr.copy_from_slice(&bytes);
296 Ok(PublicKey::from(arr))
297}
298
299#[cfg(test)]
300mod tests {
301 use super::*;
302
303 #[test]
304 fn both_sides_derive_same_code() {
305 let (tx_id, alice_secret, alice_pub) = new_session();
306 let (_, bob_secret, bob_pub) = new_session();
307
308 let alice_code = derive_sas_code(&alice_secret, &bob_pub, &tx_id).unwrap();
309 let bob_code = derive_sas_code(&bob_secret, &alice_pub, &tx_id).unwrap();
310 assert_eq!(alice_code, bob_code);
311 // Decimal shape: three 4-digit groups joined by '-', each in
312 // [1000, 9191].
313 let parts: Vec<&str> = alice_code.decimal.split('-').collect();
314 assert_eq!(parts.len(), 3);
315 for p in parts {
316 assert_eq!(p.len(), 4);
317 let n: u32 = p.parse().unwrap();
318 assert!((1000..=9191).contains(&n));
319 }
320 // Indices must all be in 0..49 (MSC 2241 table size).
321 for i in alice_code.emoji_indices {
322 assert!((i as usize) < SAS_EMOJI.len());
323 }
324 }
325
326 #[test]
327 fn different_tx_id_yields_different_code() {
328 let (tx_id_a, alice_secret, _) = new_session();
329 let (_, bob_secret, bob_pub) = new_session();
330 let alice_code = derive_sas_code(&alice_secret, &bob_pub, &tx_id_a).unwrap();
331
332 let mut tx_id_b = tx_id_a;
333 tx_id_b[0] ^= 0xff;
334 let alice_code_b = derive_sas_code(&alice_secret, &bob_pub, &tx_id_b).unwrap();
335 let _ = bob_secret;
336 assert_ne!(alice_code, alice_code_b);
337 }
338
339 #[test]
340 fn mitm_substitute_yields_different_code() {
341 // Mallory MITMs: Alice's traffic to Bob is replaced with
342 // Mallory's pubkey, and vice versa. Alice computes ECDH with
343 // Mallory's pub; Bob computes ECDH with Mallory's pub. Their
344 // SAS codes will both differ from each other and from a
345 // legitimate same-pubkey-pair derivation — so OOB comparison
346 // catches the attack.
347 let (tx_id, alice_secret, alice_pub) = new_session();
348 let (_, bob_secret, bob_pub) = new_session();
349 let (_, _mallory_secret, mallory_pub) = new_session();
350
351 let alice_thinks_bob = derive_sas_code(&alice_secret, &mallory_pub, &tx_id).unwrap();
352 let bob_thinks_alice = derive_sas_code(&bob_secret, &mallory_pub, &tx_id).unwrap();
353 assert_ne!(alice_thinks_bob, bob_thinks_alice);
354
355 // Sanity: without MITM, both sides agree.
356 let alice_real = derive_sas_code(&alice_secret, &bob_pub, &tx_id).unwrap();
357 let bob_real = derive_sas_code(&bob_secret, &alice_pub, &tx_id).unwrap();
358 assert_eq!(alice_real, bob_real);
359 }
360
361 #[test]
362 fn rejects_small_order_ephemeral() {
363 // The X25519 all-zero point is non-contributory (small-order):
364 // ECDH with it yields an all-zero shared secret regardless of our
365 // secret. derive_sas_code must reject it rather than emit a code.
366 let (tx_id, our_secret, _) = new_session();
367 let zero_pub = PublicKey::from([0u8; 32]);
368 assert!(derive_sas_code(&our_secret, &zero_pub, &tx_id).is_err());
369 }
370
371 #[test]
372 fn pubkey_round_trip() {
373 let (_, _, pub_) = new_session();
374 use base64::engine::general_purpose::STANDARD as B64;
375 use base64::Engine;
376 let encoded = B64.encode(pub_.as_bytes());
377 let decoded = parse_pubkey(&encoded).unwrap();
378 assert_eq!(decoded.as_bytes(), pub_.as_bytes());
379 }
380}