ts_runtime/tka_sync.rs
1//! Tailnet-Lock (TKA) chain-sync orchestration: the runtime-layer driver that ties the transport
2//! RPCs (`ts_control::{tka_bootstrap, tka_sync_offer, tka_sync_send}`) to the chain logic
3//! (`ts_tka::{Aum, Authority, MemAumStore, VerifiedAumChain}`), mirroring Go's `tkaSyncIfNeeded`
4//! (`ipn/ipnlocal/tailnet-lock.go`, v1.100.0).
5//!
6//! This lives in `ts_runtime` because it is the only layer that depends on **both** the wire crate
7//! (`ts_control`, which deliberately knows nothing of `ts_tka`) and the chain crate (`ts_tka`). It
8//! converts between the wire forms (base32 head strings, base64'd raw-CBOR AUM bytes) and the domain
9//! types, and drives the two-phase flow:
10//!
11//! 1. **Bootstrap** (only when we hold no chain yet): `tka_bootstrap` fetches the genesis AUM; we
12//! `Aum::from_cbor` it, build the initial [`Authority`] via the **un-bypassable trust boundary**
13//! `VerifiedAumChain::verify` → `Authority::from_verified_chain`, and seed a [`MemAumStore`].
14//! 2. **Sync** (offer → send): compute our [`SyncOffer`], send it, decode the AUMs control says we're
15//! missing, `Inform`-equivalent (verify + fold into a fresh Authority over the grown store), then
16//! tell control the AUMs *it* is missing. The order matches Go exactly — we compute what to *send*
17//! from the pre-Inform store, then advance.
18//!
19//! **Posture (this module fails open; the published `Authority` is then ENFORCED).** This is two
20//! distinct claims, kept distinct:
21//!
22//! - *Sync failure is fail-open here*: every failure path in **this** module (a transport error, a
23//! malformed AUM, a verify failure) returns `Ok(None)` or an `Err` that the caller treats as "no
24//! new Authority obtained this round" — a failed *sync* never blocks the netmap and leaves the
25//! prior enforcement state untouched (see `control_runner`'s apply step). It does NOT mean TKA is
26//! observe-only.
27//! - *A successfully synced `Authority` is actively enforced*: once this module returns an
28//! `Authority`, the control runner publishes it to the peer tracker's enforcement cell and the
29//! peer-trust chokepoint fails **closed** — a peer presenting a missing or unauthorized
30//! `key_signature` is **dropped** at the peer-db upsert path (`peer_tracker::tka_snapshot_admits`,
31//! matching Go's `tkaFilterNetmapLocked`). With no lock synced, every peer is admitted (Go's
32//! `b.tka == nil` early return); a control-signalled *disable* clears enforcement back to admit-all.
33//!
34//! The chain always passes through the **un-bypassable trust boundary** `VerifiedAumChain::verify`
35//! before it can reach enforcement, so a malicious control plane cannot forge a trusted key to admit
36//! an unauthorized peer — it can only toggle the lock's enable/disable state. The authoritative
37//! description of the enforcement posture, threat model, and the remaining deferred gaps
38//! (disablement-secret verification, rotation-obsolete/clone-replay dropping) lives in `SECURITY.md`;
39//! keep this doc consistent with it.
40//!
41//! **Do not "simplify" by removing enforcement to match an outdated "observe-only" reading** — that
42//! would silently downgrade a working, fail-closed security control to verify-only.
43
44use std::sync::Arc;
45
46use ts_control::{
47 TkaSyncError, TkaSyncOfferRequest, TkaSyncSendRequest, tka_bootstrap, tka_sync_offer,
48 tka_sync_send,
49};
50use ts_tka::{Aum, AumHash, Authority, MemAumStore, SyncOffer, VerifiedAumChain};
51
52/// The synced TKA state a successful [`sync_tka`] produces: the verified [`Authority`] (for the
53/// verify-and-log consumer) plus the [`MemAumStore`] of AUMs gathered so far (so the next sync can
54/// compute offers/missing-sets without re-bootstrapping).
55pub(crate) struct SyncedTka {
56 pub authority: Arc<Authority>,
57 pub store: MemAumStore,
58 /// The genesis/oldest AUM hash, needed as the `oldest` argument to subsequent `sync_offer`s.
59 pub oldest: AumHash,
60}
61
62/// One entry of the Tailnet-Lock update-chain log, mirroring Go `ipnstate.NetworkLockUpdate` (the
63/// rows `tailscale lock log` prints). Produced by [`Device::tka_log`](crate::Runtime::tka_log) from
64/// the locally-synced AUM chain — a pure local read, no control round-trip.
65///
66/// `aum_hash` + `change` + `raw` are the exact Go `NetworkLockUpdate` fields (`Hash`, `Change`,
67/// `Raw`). `signer_key_ids` is an extra convenience this engine extracts from the decoded AUM —
68/// Go's struct has no `Signatures` field and recovers the signer only by decoding `Raw`; we surface
69/// the signer key ids directly so a daemon need not re-decode, while still carrying `raw` for a
70/// faithful full decode.
71#[derive(Debug, Clone, PartialEq, Eq)]
72pub struct TkaLogEntry {
73 /// The AUM's chain-link hash (Go `NetworkLockUpdate.Hash`): `BLAKE2s-256` of its serialization.
74 pub aum_hash: [u8; 32],
75 /// The human-readable change kind (Go `NetworkLockUpdate.Change`), e.g. `"add-key"` /
76 /// `"remove-key"` / `"checkpoint"` — [`AumKind::as_str`](ts_tka::AumKind::as_str).
77 pub change: String,
78 /// The id of each trusted key that signed this AUM (each
79 /// [`AumSignature::key_id`](ts_tka::AumSignature::key_id), the signer's 32-byte ed25519 public
80 /// key for an Ed25519 key). Convenience extraction; absent from Go's struct.
81 pub signer_key_ids: Vec<Vec<u8>>,
82 /// The AUM's canonical CBOR serialization (Go `NetworkLockUpdate.Raw` = `AUM.Serialize()`), so a
83 /// consumer can decode the full AUM (incl. signatures) faithfully.
84 pub raw: Vec<u8>,
85}
86
87/// Read up to `limit` entries of the TKA update-chain log from a synced AUM `store`, **head-first**
88/// (newest → oldest), mirroring Go `NetworkLockLog` which walks `Head` back toward genesis.
89///
90/// The store holds the chain genesis→head; [`MemAumStore::linear_chain_from`] yields that
91/// genesis→head order, which we **reverse** to match Go's head→genesis walk before truncating to
92/// `limit`. A pure function over the synced state (no crypto, no mutation, no RPC) so it is unit
93/// testable without standing up an actor. An unwalkable store (genesis missing / cycle) yields an
94/// empty log rather than erroring — the caller's "no readable chain" is an empty history, matching
95/// the no-lock-synced case.
96pub(crate) fn tka_log_entries(
97 store: &MemAumStore,
98 oldest: AumHash,
99 limit: usize,
100) -> Vec<TkaLogEntry> {
101 // genesis→head; an unwalkable store (missing genesis / cycle) → empty log.
102 let chain = store.linear_chain_from(oldest).unwrap_or_default();
103 chain
104 .iter()
105 .rev() // Go walks head→genesis; the store walk is genesis→head.
106 .take(limit)
107 .map(|aum| TkaLogEntry {
108 aum_hash: aum.hash().0,
109 change: aum.message_kind.as_str().to_string(),
110 signer_key_ids: aum.signatures.iter().map(|s| s.key_id.clone()).collect(),
111 raw: aum.serialize(),
112 })
113 .collect()
114}
115
116/// Errors internal to the sync driver. All map to "no Authority obtained" at the caller — the netmap
117/// is never errored and peers are never dropped on any of these.
118#[derive(Debug, thiserror::Error)]
119pub(crate) enum TkaSyncDriverError {
120 /// A transport RPC failed (network / unsupported / HTTP). `Unsupported` means control has no TKA
121 /// endpoint — treat as "inert", not a hard error.
122 #[error("TKA sync RPC failed: {0}")]
123 Rpc(#[from] TkaSyncError),
124 /// An AUM from control failed to decode or verify. Fail-closed: we do NOT advance the Authority.
125 #[error("TKA chain verification failed: {0}")]
126 Chain(#[from] ts_tka::TkaError),
127}
128
129/// Decode a base64-of-CBOR AUM batch (the wire form of `MissingAUMs`) into domain [`Aum`]s.
130/// Fail-closed: a single undecodable AUM rejects the whole batch (we never partially trust).
131fn decode_aums(marshaled: &[Vec<u8>]) -> Result<Vec<Aum>, ts_tka::TkaError> {
132 marshaled.iter().map(|b| Aum::from_cbor(b)).collect()
133}
134
135/// Re-verify a chain (existing store contents + newly-received AUMs) into a fresh [`Authority`],
136/// the `Inform` analog. We replay the full known AUM set through the trust boundary rather than
137/// mutating in place, so the resulting Authority is always one `VerifiedAumChain::verify` proved.
138///
139/// The store's AUMs in linear genesis→head order are what `verify` expects; we reconstruct that order
140/// by walking from the genesis (`oldest`) forward via the store's child links.
141fn rebuild_authority(store: &MemAumStore, oldest: AumHash) -> Result<Authority, ts_tka::TkaError> {
142 let chain = store.linear_chain_from(oldest)?;
143 let verified = VerifiedAumChain::verify(&chain)?;
144 Ok(Authority::from_verified_chain(verified))
145}
146
147/// Run a TKA bootstrap+sync cycle against control.
148///
149/// `current` is our existing synced state (`None` on first run → bootstrap first). Returns
150/// `Ok(Some(SyncedTka))` with the advanced Authority on success, `Ok(None)` when control has no lock
151/// for us (inert), or `Err` on a transport/verify failure (caller stays inert).
152pub(crate) async fn sync_tka(
153 config: &ts_control::Config,
154 keys: &ts_keys::NodeState,
155 current: Option<SyncedTka>,
156) -> Result<Option<SyncedTka>, TkaSyncDriverError> {
157 let control_url = &config.server_url;
158 let allow_http_key_fetch = config.allow_http_key_fetch;
159
160 // Phase 1: bootstrap if we have no chain yet.
161 let (mut store, oldest, mut authority) = match current {
162 Some(s) => (s.store, s.oldest, (*s.authority).clone()),
163 None => {
164 let resp = tka_bootstrap(
165 control_url,
166 keys,
167 String::new(), // no local head yet
168 allow_http_key_fetch,
169 )
170 .await?;
171 if resp.genesis_aum.is_empty() {
172 // Control returned no genesis: TKA is not enabled for us. Stay inert (not an error).
173 return Ok(None);
174 }
175 let genesis = Aum::from_cbor(&resp.genesis_aum)?;
176 let oldest = genesis.hash();
177 let mut store = MemAumStore::new();
178 store.insert(genesis);
179 let authority = rebuild_authority(&store, oldest)?;
180 (store, oldest, authority)
181 }
182 };
183
184 // Phase 2: offer → (decode + inform) → send. Mirror Go's order exactly.
185 let local_offer = authority.sync_offer(&store, oldest)?;
186 let offer_req = TkaSyncOfferRequest {
187 version: Default::default(), // overwritten by the RPC with CURRENT
188 node_key: keys.node_keys.public,
189 head: local_offer.head.to_base32(),
190 ancestors: local_offer
191 .ancestors
192 .iter()
193 .map(|a| a.to_base32())
194 .collect(),
195 };
196 let offer_resp = tka_sync_offer(control_url, keys, offer_req, allow_http_key_fetch).await?;
197
198 // Reconstruct control's offer from the response so we can compute what *control* is missing —
199 // BEFORE we Inform ourselves with control's AUMs (Go computes missing-to-send pre-Inform).
200 let control_offer = parse_offer(&offer_resp.head, &offer_resp.ancestors)?;
201
202 // Decode + insert the AUMs control sent, then rebuild (verify) the advanced Authority.
203 let received = decode_aums(&offer_resp.missing_aums)?;
204 for aum in &received {
205 store.insert(aum.clone());
206 }
207 // Compute what control is missing from the store as it stands (post-insert is fine: missing_aums
208 // is computed against control's offer, and the gather is from our head — inserting control's own
209 // AUMs cannot make us think it lacks them).
210 let to_send = authority
211 .missing_aums(&store, &control_offer, oldest)
212 .unwrap_or_default();
213 // Advance our Authority over the grown store (the Inform analog) — through the trust boundary.
214 authority = rebuild_authority(&store, oldest)?;
215
216 // Phase 3: send control the AUMs it lacks (best-effort; a failure here doesn't undo our advance).
217 let send_req = TkaSyncSendRequest {
218 version: Default::default(),
219 node_key: keys.node_keys.public,
220 head: authority.head().to_base32(),
221 missing_aums: to_send.iter().map(Aum::serialize).collect(),
222 interactive: false,
223 };
224 if let Err(e) = tka_sync_send(control_url, keys, send_req, allow_http_key_fetch).await {
225 // We already advanced locally; control not accepting our AUMs is logged, not fatal.
226 tracing::warn!(error = ?e, "TKA sync/send failed (local Authority already advanced)");
227 }
228
229 Ok(Some(SyncedTka {
230 authority: Arc::new(authority),
231 store,
232 oldest,
233 }))
234}
235
236/// Parse a wire offer (base32 head + ancestors) into a domain [`SyncOffer`]. A malformed base32 hash
237/// is a decode error (fail-closed).
238fn parse_offer(head: &str, ancestors: &[String]) -> Result<SyncOffer, ts_tka::TkaError> {
239 let head = AumHash::from_base32(head).ok_or(ts_tka::TkaError::Decode("bad base32 head"))?;
240 let ancestors = ancestors
241 .iter()
242 .map(|a| AumHash::from_base32(a).ok_or(ts_tka::TkaError::Decode("bad base32 ancestor")))
243 .collect::<Result<Vec<_>, _>>()?;
244 Ok(SyncOffer { head, ancestors })
245}
246
247#[cfg(test)]
248mod tests {
249 use super::*;
250
251 #[test]
252 fn parse_offer_roundtrips_base32() {
253 // A head + two ancestors as base32 (no-pad) of 32-byte hashes parse back to those hashes.
254 let h0 = AumHash([0x11; 32]);
255 let h1 = AumHash([0x22; 32]);
256 let h2 = AumHash([0x33; 32]);
257 let offer = parse_offer(&h0.to_base32(), &[h1.to_base32(), h2.to_base32()]).expect("parse");
258 assert_eq!(offer.head, h0);
259 assert_eq!(offer.ancestors, vec![h1, h2]);
260 }
261
262 #[test]
263 fn parse_offer_rejects_bad_base32() {
264 // A non-base32 / wrong-length head fails closed (not a panic).
265 assert!(parse_offer("not valid base32!", &[]).is_err());
266 // A good head but a bad ancestor also fails.
267 let good = AumHash([1u8; 32]).to_base32();
268 assert!(parse_offer(&good, &["@@@@".to_string()]).is_err());
269 }
270
271 #[test]
272 fn decode_aums_roundtrips_and_rejects_garbage() {
273 // A valid AUM serializes → decode_aums reconstructs it; a garbage blob in the batch rejects
274 // the whole batch (fail-closed, never partial trust).
275 let aum = Aum {
276 message_kind: ts_tka::AumKind::NoOp,
277 prev_aum_hash: None,
278 key: None,
279 key_id: Vec::new(),
280 state: None,
281 votes: None,
282 meta: Vec::new(),
283 signatures: Vec::new(),
284 };
285 let good = aum.serialize();
286 let decoded = decode_aums(std::slice::from_ref(&good)).expect("decode");
287 assert_eq!(decoded.len(), 1);
288 assert_eq!(decoded[0].hash(), aum.hash());
289 // One garbage blob alongside a good one → the whole batch errors.
290 assert!(decode_aums(&[good, vec![0xff, 0x00, 0x13]]).is_err());
291 }
292
293 // ---- tka_log_entries (PR-A) ----------------------------------------------------------------
294
295 /// A test [`AumKey`](ts_tka::AumKey) from a seed byte (deterministic public key + given votes).
296 fn test_aum_key(seed: u8, votes: u32) -> ts_tka::AumKey {
297 use ed25519_dalek::SigningKey;
298 ts_tka::AumKey {
299 kind: ts_tka::KeyKind::Ed25519,
300 votes,
301 public: SigningKey::from_bytes(&[seed; 32])
302 .verifying_key()
303 .to_bytes()
304 .to_vec(),
305 meta: Vec::new(),
306 }
307 }
308
309 /// A genesis `Checkpoint` AUM trusting `key` (no parent). Mirrors the on-wire genesis a node
310 /// syncs; built directly (not via `new_genesis_checkpoint`) so the test stays a pure
311 /// ordering/mapping check independent of disablement-value construction.
312 fn genesis_checkpoint(key: ts_tka::AumKey) -> Aum {
313 Aum {
314 message_kind: ts_tka::AumKind::Checkpoint,
315 prev_aum_hash: None,
316 key: None,
317 key_id: Vec::new(),
318 state: Some(ts_tka::AumState {
319 last_aum_hash: None,
320 disablement_values: Some(vec![vec![0x11; 32]]),
321 keys: Some(vec![key]),
322 state_id1: 0,
323 state_id2: 0,
324 }),
325 votes: None,
326 meta: Vec::new(),
327 signatures: Vec::new(),
328 }
329 }
330
331 /// An `AddKey` child of `parent` adding `key`.
332 fn add_key_child(parent: &Aum, key: ts_tka::AumKey) -> Aum {
333 Aum {
334 message_kind: ts_tka::AumKind::AddKey,
335 prev_aum_hash: Some(parent.hash()),
336 key: Some(key),
337 key_id: Vec::new(),
338 state: None,
339 votes: None,
340 meta: Vec::new(),
341 signatures: Vec::new(),
342 }
343 }
344
345 /// `tka_log_entries` returns the chain **head-first** (Go `NetworkLockLog` walks head→genesis,
346 /// the opposite of the store's genesis→head order), with the correct `change` strings, an
347 /// `aum_hash` matching `Aum::hash`, and a `raw` that round-trips through the AUM decoder.
348 #[test]
349 fn tka_log_entries_head_first_with_fields() {
350 let g = genesis_checkpoint(test_aum_key(1, 1));
351 let a1 = add_key_child(&g, test_aum_key(2, 1));
352 let a2 = add_key_child(&a1, test_aum_key(3, 1));
353 // Insert in a scrambled order to prove ordering is by chain links, not insert order.
354 let mut store = MemAumStore::new();
355 store.insert(a1.clone());
356 store.insert(a2.clone());
357 store.insert(g.clone());
358
359 let log = tka_log_entries(&store, g.hash(), 100);
360
361 // (a) head-first: newest (a2) → genesis (g).
362 let got_hashes: Vec<[u8; 32]> = log.iter().map(|e| e.aum_hash).collect();
363 assert_eq!(
364 got_hashes,
365 vec![a2.hash().0, a1.hash().0, g.hash().0],
366 "log must be head-first (a2, a1, genesis)"
367 );
368 // (b) change strings.
369 let changes: Vec<&str> = log.iter().map(|e| e.change.as_str()).collect();
370 assert_eq!(changes, vec!["add-key", "add-key", "checkpoint"]);
371 // (c) aum_hash == Aum::hash().0 (re-checked against the genesis explicitly).
372 assert_eq!(log[2].aum_hash, g.hash().0);
373 // (d) raw round-trips through the AUM decoder back to the same AUM.
374 for (entry, aum) in log.iter().zip([&a2, &a1, &g]) {
375 let decoded = Aum::from_cbor(&entry.raw).expect("raw is canonical AUM CBOR");
376 assert_eq!(&decoded, aum, "raw must decode back to the source AUM");
377 }
378 }
379
380 /// `limit` truncates from the head (the most recent `limit` entries).
381 #[test]
382 fn tka_log_entries_limit_truncates_from_head() {
383 let g = genesis_checkpoint(test_aum_key(1, 1));
384 let a1 = add_key_child(&g, test_aum_key(2, 1));
385 let a2 = add_key_child(&a1, test_aum_key(3, 1));
386 let store = MemAumStore::from_aums([g.clone(), a1.clone(), a2.clone()]);
387
388 let log = tka_log_entries(&store, g.hash(), 2);
389 assert_eq!(log.len(), 2, "limit caps the row count");
390 assert_eq!(
391 log.iter().map(|e| e.aum_hash).collect::<Vec<_>>(),
392 vec![a2.hash().0, a1.hash().0],
393 "limit keeps the newest entries (head-first)"
394 );
395 // limit 0 → empty.
396 assert!(tka_log_entries(&store, g.hash(), 0).is_empty());
397 }
398
399 /// `signer_key_ids` is the `key_id` of each [`AumSignature`](ts_tka::AumSignature) on the AUM,
400 /// in order — what a daemon renders without re-decoding `raw`.
401 #[test]
402 fn tka_log_entries_extracts_signer_key_ids() {
403 use ed25519_dalek::SigningKey;
404 let mut g = genesis_checkpoint(test_aum_key(1, 1));
405 // Sign the genesis with the key it seeds (exactly what `Aum::sign` records: key_id = the
406 // signer's verifying-key bytes).
407 let sk = SigningKey::from_bytes(&[1u8; 32]);
408 g.sign(&sk);
409 let signer_id = sk.verifying_key().to_bytes().to_vec();
410 let store = MemAumStore::from_aums([g.clone()]);
411
412 let log = tka_log_entries(&store, g.hash(), 100);
413 assert_eq!(log.len(), 1);
414 assert_eq!(
415 log[0].signer_key_ids,
416 vec![signer_id],
417 "signer_key_ids carries each signature's key_id"
418 );
419 // An unsigned AUM yields no signer ids.
420 let unsigned = genesis_checkpoint(test_aum_key(2, 1));
421 let store2 = MemAumStore::from_aums([unsigned.clone()]);
422 assert!(
423 tka_log_entries(&store2, unsigned.hash(), 100)[0]
424 .signer_key_ids
425 .is_empty()
426 );
427 }
428
429 /// An empty / unwalkable store yields an empty log (mirrors the no-lock-synced case the actor
430 /// short-circuits before ever calling this): a missing genesis is an empty history, never an
431 /// error.
432 #[test]
433 fn tka_log_entries_unwalkable_store_is_empty() {
434 // Empty store: any `oldest` is absent → BadChain inside, mapped to an empty Vec.
435 let empty = MemAumStore::new();
436 assert!(tka_log_entries(&empty, AumHash([0u8; 32]), 100).is_empty());
437 // Non-empty store but `oldest` not present → still empty (not a panic / error).
438 let g = genesis_checkpoint(test_aum_key(1, 1));
439 let store = MemAumStore::from_aums([g]);
440 assert!(tka_log_entries(&store, AumHash([0xEE; 32]), 100).is_empty());
441 }
442}