Skip to main content

ts_runtime/
tka_sync.rs

1//! Tailnet-Lock (TKA) chain-sync orchestration: the runtime-layer driver that ties the transport
2//! RPCs (`ts_control::{tka_bootstrap, tka_sync_offer, tka_sync_send}`) to the chain logic
3//! (`ts_tka::{Aum, Authority, MemAumStore, VerifiedAumChain}`), mirroring Go's `tkaSyncIfNeeded`
4//! (`ipn/ipnlocal/tailnet-lock.go`, v1.100.0).
5//!
6//! This lives in `ts_runtime` because it is the only layer that depends on **both** the wire crate
7//! (`ts_control`, which deliberately knows nothing of `ts_tka`) and the chain crate (`ts_tka`). It
8//! converts between the wire forms (base32 head strings, base64'd raw-CBOR AUM bytes) and the domain
9//! types, and drives the two-phase flow:
10//!
11//! 1. **Bootstrap** (only when we hold no chain yet): `tka_bootstrap` fetches the genesis AUM; we
12//!    `Aum::from_cbor` it, build the initial [`Authority`] via the **un-bypassable trust boundary**
13//!    `VerifiedAumChain::verify` → `Authority::from_verified_chain`, and seed a [`MemAumStore`].
14//! 2. **Sync** (offer → send): compute our [`SyncOffer`], send it, decode the AUMs control says we're
15//!    missing, `Inform`-equivalent (verify + fold into a fresh Authority over the grown store), then
16//!    tell control the AUMs *it* is missing. The order matches Go exactly — we compute what to *send*
17//!    from the pre-Inform store, then advance.
18//!
19//! **Posture (this module fails open; the published `Authority` is then ENFORCED).** This is two
20//! distinct claims, kept distinct:
21//!
22//! - *Sync failure is fail-open here*: every failure path in **this** module (a transport error, a
23//!   malformed AUM, a verify failure) returns `Ok(None)` or an `Err` that the caller treats as "no
24//!   new Authority obtained this round" — a failed *sync* never blocks the netmap and leaves the
25//!   prior enforcement state untouched (see `control_runner`'s apply step). It does NOT mean TKA is
26//!   observe-only.
27//! - *A successfully synced `Authority` is actively enforced*: once this module returns an
28//!   `Authority`, the control runner publishes it to the peer tracker's enforcement cell and the
29//!   peer-trust chokepoint fails **closed** — a peer presenting a missing or unauthorized
30//!   `key_signature` is **dropped** at the peer-db upsert path (`peer_tracker::tka_snapshot_admits`,
31//!   matching Go's `tkaFilterNetmapLocked`). With no lock synced, every peer is admitted (Go's
32//!   `b.tka == nil` early return); a control-signalled *disable* clears enforcement back to admit-all.
33//!
34//! The chain always passes through the **un-bypassable trust boundary** `VerifiedAumChain::verify`
35//! before it can reach enforcement, so a malicious control plane cannot forge a trusted key to admit
36//! an unauthorized peer — it can only toggle the lock's enable/disable state. The authoritative
37//! description of the enforcement posture, threat model, and the remaining deferred gaps
38//! (disablement-secret verification, rotation-obsolete/clone-replay dropping) lives in `SECURITY.md`;
39//! keep this doc consistent with it.
40//!
41//! **Do not "simplify" by removing enforcement to match an outdated "observe-only" reading** — that
42//! would silently downgrade a working, fail-closed security control to verify-only.
43
44use std::sync::Arc;
45
46use ts_control::{
47    TkaSyncError, TkaSyncOfferRequest, TkaSyncSendRequest, tka_bootstrap, tka_sync_offer,
48    tka_sync_send,
49};
50use ts_tka::{Aum, AumHash, Authority, MemAumStore, SyncOffer, VerifiedAumChain};
51
52/// The synced TKA state a successful [`sync_tka`] produces: the verified [`Authority`] (for the
53/// verify-and-log consumer) plus the [`MemAumStore`] of AUMs gathered so far (so the next sync can
54/// compute offers/missing-sets without re-bootstrapping).
55pub(crate) struct SyncedTka {
56    pub authority: Arc<Authority>,
57    pub store: MemAumStore,
58    /// The genesis/oldest AUM hash, needed as the `oldest` argument to subsequent `sync_offer`s.
59    pub oldest: AumHash,
60}
61
62/// One entry of the Tailnet-Lock update-chain log, mirroring Go `ipnstate.NetworkLockUpdate` (the
63/// rows `tailscale lock log` prints). Produced by [`Device::tka_log`](crate::Runtime::tka_log) from
64/// the locally-synced AUM chain — a pure local read, no control round-trip.
65///
66/// `aum_hash` + `change` + `raw` are the exact Go `NetworkLockUpdate` fields (`Hash`, `Change`,
67/// `Raw`). `signer_key_ids` is an extra convenience this engine extracts from the decoded AUM —
68/// Go's struct has no `Signatures` field and recovers the signer only by decoding `Raw`; we surface
69/// the signer key ids directly so a daemon need not re-decode, while still carrying `raw` for a
70/// faithful full decode.
71#[derive(Debug, Clone, PartialEq, Eq)]
72pub struct TkaLogEntry {
73    /// The AUM's chain-link hash (Go `NetworkLockUpdate.Hash`): `BLAKE2s-256` of its serialization.
74    pub aum_hash: [u8; 32],
75    /// The human-readable change kind (Go `NetworkLockUpdate.Change`), e.g. `"add-key"` /
76    /// `"remove-key"` / `"checkpoint"` — [`AumKind::as_str`](ts_tka::AumKind::as_str).
77    pub change: String,
78    /// The id of each trusted key that signed this AUM (each
79    /// [`AumSignature::key_id`](ts_tka::AumSignature::key_id), the signer's 32-byte ed25519 public
80    /// key for an Ed25519 key). Convenience extraction; absent from Go's struct.
81    pub signer_key_ids: Vec<Vec<u8>>,
82    /// The AUM's canonical CBOR serialization (Go `NetworkLockUpdate.Raw` = `AUM.Serialize()`), so a
83    /// consumer can decode the full AUM (incl. signatures) faithfully.
84    pub raw: Vec<u8>,
85}
86
87/// Read up to `limit` entries of the TKA update-chain log from a synced AUM `store`, **head-first**
88/// (newest → oldest), mirroring Go `NetworkLockLog` which walks `Head` back toward genesis.
89///
90/// The store holds the chain genesis→head; [`MemAumStore::linear_chain_from`] yields that
91/// genesis→head order, which we **reverse** to match Go's head→genesis walk before truncating to
92/// `limit`. A pure function over the synced state (no crypto, no mutation, no RPC) so it is unit
93/// testable without standing up an actor. An unwalkable store (genesis missing / cycle) yields an
94/// empty log rather than erroring — the caller's "no readable chain" is an empty history, matching
95/// the no-lock-synced case.
96pub(crate) fn tka_log_entries(
97    store: &MemAumStore,
98    oldest: AumHash,
99    limit: usize,
100) -> Vec<TkaLogEntry> {
101    // genesis→head; an unwalkable store (missing genesis / cycle) → empty log.
102    let chain = store.linear_chain_from(oldest).unwrap_or_default();
103    chain
104        .iter()
105        .rev() // Go walks head→genesis; the store walk is genesis→head.
106        .take(limit)
107        .map(|aum| TkaLogEntry {
108            aum_hash: aum.hash().0,
109            change: aum.message_kind.as_str().to_string(),
110            signer_key_ids: aum.signatures.iter().map(|s| s.key_id.clone()).collect(),
111            raw: aum.serialize(),
112        })
113        .collect()
114}
115
116/// Errors internal to the sync driver. All map to "no Authority obtained" at the caller — the netmap
117/// is never errored and peers are never dropped on any of these.
118#[derive(Debug, thiserror::Error)]
119pub(crate) enum TkaSyncDriverError {
120    /// A transport RPC failed (network / unsupported / HTTP). `Unsupported` means control has no TKA
121    /// endpoint — treat as "inert", not a hard error.
122    #[error("TKA sync RPC failed: {0}")]
123    Rpc(#[from] TkaSyncError),
124    /// An AUM from control failed to decode or verify. Fail-closed: we do NOT advance the Authority.
125    #[error("TKA chain verification failed: {0}")]
126    Chain(#[from] ts_tka::TkaError),
127}
128
129/// Decode a base64-of-CBOR AUM batch (the wire form of `MissingAUMs`) into domain [`Aum`]s.
130/// Fail-closed: a single undecodable AUM rejects the whole batch (we never partially trust).
131fn decode_aums(marshaled: &[Vec<u8>]) -> Result<Vec<Aum>, ts_tka::TkaError> {
132    marshaled.iter().map(|b| Aum::from_cbor(b)).collect()
133}
134
135/// Re-verify a chain (existing store contents + newly-received AUMs) into a fresh [`Authority`],
136/// the `Inform` analog. We replay the full known AUM set through the trust boundary rather than
137/// mutating in place, so the resulting Authority is always one `VerifiedAumChain::verify` proved.
138///
139/// The store's AUMs in linear genesis→head order are what `verify` expects; we reconstruct that order
140/// by walking from the genesis (`oldest`) forward via the store's child links.
141fn rebuild_authority(store: &MemAumStore, oldest: AumHash) -> Result<Authority, ts_tka::TkaError> {
142    let chain = store.linear_chain_from(oldest)?;
143    let verified = VerifiedAumChain::verify(&chain)?;
144    Ok(Authority::from_verified_chain(verified))
145}
146
147/// Run a TKA bootstrap+sync cycle against control.
148///
149/// `current` is our existing synced state (`None` on first run → bootstrap first). Returns
150/// `Ok(Some(SyncedTka))` with the advanced Authority on success, `Ok(None)` when control has no lock
151/// for us (inert), or `Err` on a transport/verify failure (caller stays inert).
152pub(crate) async fn sync_tka(
153    config: &ts_control::Config,
154    keys: &ts_keys::NodeState,
155    current: Option<SyncedTka>,
156) -> Result<Option<SyncedTka>, TkaSyncDriverError> {
157    let control_url = &config.server_url;
158    let allow_http_key_fetch = config.allow_http_key_fetch;
159
160    // Phase 1: bootstrap if we have no chain yet.
161    let (mut store, oldest, mut authority) = match current {
162        Some(s) => (s.store, s.oldest, (*s.authority).clone()),
163        None => {
164            let resp = tka_bootstrap(
165                control_url,
166                keys,
167                String::new(), // no local head yet
168                allow_http_key_fetch,
169            )
170            .await?;
171            if resp.genesis_aum.is_empty() {
172                // Control returned no genesis: TKA is not enabled for us. Stay inert (not an error).
173                return Ok(None);
174            }
175            let genesis = Aum::from_cbor(&resp.genesis_aum)?;
176            let oldest = genesis.hash();
177            let mut store = MemAumStore::new();
178            store.insert(genesis);
179            let authority = rebuild_authority(&store, oldest)?;
180            (store, oldest, authority)
181        }
182    };
183
184    // Phase 2: offer → (decode + inform) → send. Mirror Go's order exactly.
185    let local_offer = authority.sync_offer(&store, oldest)?;
186    let offer_req = TkaSyncOfferRequest {
187        version: Default::default(), // overwritten by the RPC with CURRENT
188        node_key: keys.node_keys.public,
189        head: local_offer.head.to_base32(),
190        ancestors: local_offer
191            .ancestors
192            .iter()
193            .map(|a| a.to_base32())
194            .collect(),
195    };
196    let offer_resp = tka_sync_offer(control_url, keys, offer_req, allow_http_key_fetch).await?;
197
198    // Reconstruct control's offer from the response so we can compute what *control* is missing —
199    // BEFORE we Inform ourselves with control's AUMs (Go computes missing-to-send pre-Inform).
200    let control_offer = parse_offer(&offer_resp.head, &offer_resp.ancestors)?;
201
202    // Decode + insert the AUMs control sent, then rebuild (verify) the advanced Authority.
203    let received = decode_aums(&offer_resp.missing_aums)?;
204    for aum in &received {
205        store.insert(aum.clone());
206    }
207    // Compute what control is missing from the store as it stands (post-insert is fine: missing_aums
208    // is computed against control's offer, and the gather is from our head — inserting control's own
209    // AUMs cannot make us think it lacks them).
210    let to_send = authority
211        .missing_aums(&store, &control_offer, oldest)
212        .unwrap_or_default();
213    // Advance our Authority over the grown store (the Inform analog) — through the trust boundary.
214    authority = rebuild_authority(&store, oldest)?;
215
216    // Phase 3: send control the AUMs it lacks (best-effort; a failure here doesn't undo our advance).
217    let send_req = TkaSyncSendRequest {
218        version: Default::default(),
219        node_key: keys.node_keys.public,
220        head: authority.head().to_base32(),
221        missing_aums: to_send.iter().map(Aum::serialize).collect(),
222        interactive: false,
223    };
224    if let Err(e) = tka_sync_send(control_url, keys, send_req, allow_http_key_fetch).await {
225        // We already advanced locally; control not accepting our AUMs is logged, not fatal.
226        tracing::warn!(error = ?e, "TKA sync/send failed (local Authority already advanced)");
227    }
228
229    Ok(Some(SyncedTka {
230        authority: Arc::new(authority),
231        store,
232        oldest,
233    }))
234}
235
236/// Parse a wire offer (base32 head + ancestors) into a domain [`SyncOffer`]. A malformed base32 hash
237/// is a decode error (fail-closed).
238fn parse_offer(head: &str, ancestors: &[String]) -> Result<SyncOffer, ts_tka::TkaError> {
239    let head = AumHash::from_base32(head).ok_or(ts_tka::TkaError::Decode("bad base32 head"))?;
240    let ancestors = ancestors
241        .iter()
242        .map(|a| AumHash::from_base32(a).ok_or(ts_tka::TkaError::Decode("bad base32 ancestor")))
243        .collect::<Result<Vec<_>, _>>()?;
244    Ok(SyncOffer { head, ancestors })
245}
246
247#[cfg(test)]
248mod tests {
249    use super::*;
250
251    #[test]
252    fn parse_offer_roundtrips_base32() {
253        // A head + two ancestors as base32 (no-pad) of 32-byte hashes parse back to those hashes.
254        let h0 = AumHash([0x11; 32]);
255        let h1 = AumHash([0x22; 32]);
256        let h2 = AumHash([0x33; 32]);
257        let offer = parse_offer(&h0.to_base32(), &[h1.to_base32(), h2.to_base32()]).expect("parse");
258        assert_eq!(offer.head, h0);
259        assert_eq!(offer.ancestors, vec![h1, h2]);
260    }
261
262    #[test]
263    fn parse_offer_rejects_bad_base32() {
264        // A non-base32 / wrong-length head fails closed (not a panic).
265        assert!(parse_offer("not valid base32!", &[]).is_err());
266        // A good head but a bad ancestor also fails.
267        let good = AumHash([1u8; 32]).to_base32();
268        assert!(parse_offer(&good, &["@@@@".to_string()]).is_err());
269    }
270
271    #[test]
272    fn decode_aums_roundtrips_and_rejects_garbage() {
273        // A valid AUM serializes → decode_aums reconstructs it; a garbage blob in the batch rejects
274        // the whole batch (fail-closed, never partial trust).
275        let aum = Aum {
276            message_kind: ts_tka::AumKind::NoOp,
277            prev_aum_hash: None,
278            key: None,
279            key_id: Vec::new(),
280            state: None,
281            votes: None,
282            meta: Vec::new(),
283            signatures: Vec::new(),
284        };
285        let good = aum.serialize();
286        let decoded = decode_aums(std::slice::from_ref(&good)).expect("decode");
287        assert_eq!(decoded.len(), 1);
288        assert_eq!(decoded[0].hash(), aum.hash());
289        // One garbage blob alongside a good one → the whole batch errors.
290        assert!(decode_aums(&[good, vec![0xff, 0x00, 0x13]]).is_err());
291    }
292
293    // ---- tka_log_entries (PR-A) ----------------------------------------------------------------
294
295    /// A test [`AumKey`](ts_tka::AumKey) from a seed byte (deterministic public key + given votes).
296    fn test_aum_key(seed: u8, votes: u32) -> ts_tka::AumKey {
297        use ed25519_dalek::SigningKey;
298        ts_tka::AumKey {
299            kind: ts_tka::KeyKind::Ed25519,
300            votes,
301            public: SigningKey::from_bytes(&[seed; 32])
302                .verifying_key()
303                .to_bytes()
304                .to_vec(),
305            meta: Vec::new(),
306        }
307    }
308
309    /// A genesis `Checkpoint` AUM trusting `key` (no parent). Mirrors the on-wire genesis a node
310    /// syncs; built directly (not via `new_genesis_checkpoint`) so the test stays a pure
311    /// ordering/mapping check independent of disablement-value construction.
312    fn genesis_checkpoint(key: ts_tka::AumKey) -> Aum {
313        Aum {
314            message_kind: ts_tka::AumKind::Checkpoint,
315            prev_aum_hash: None,
316            key: None,
317            key_id: Vec::new(),
318            state: Some(ts_tka::AumState {
319                last_aum_hash: None,
320                disablement_values: Some(vec![vec![0x11; 32]]),
321                keys: Some(vec![key]),
322                state_id1: 0,
323                state_id2: 0,
324            }),
325            votes: None,
326            meta: Vec::new(),
327            signatures: Vec::new(),
328        }
329    }
330
331    /// An `AddKey` child of `parent` adding `key`.
332    fn add_key_child(parent: &Aum, key: ts_tka::AumKey) -> Aum {
333        Aum {
334            message_kind: ts_tka::AumKind::AddKey,
335            prev_aum_hash: Some(parent.hash()),
336            key: Some(key),
337            key_id: Vec::new(),
338            state: None,
339            votes: None,
340            meta: Vec::new(),
341            signatures: Vec::new(),
342        }
343    }
344
345    /// `tka_log_entries` returns the chain **head-first** (Go `NetworkLockLog` walks head→genesis,
346    /// the opposite of the store's genesis→head order), with the correct `change` strings, an
347    /// `aum_hash` matching `Aum::hash`, and a `raw` that round-trips through the AUM decoder.
348    #[test]
349    fn tka_log_entries_head_first_with_fields() {
350        let g = genesis_checkpoint(test_aum_key(1, 1));
351        let a1 = add_key_child(&g, test_aum_key(2, 1));
352        let a2 = add_key_child(&a1, test_aum_key(3, 1));
353        // Insert in a scrambled order to prove ordering is by chain links, not insert order.
354        let mut store = MemAumStore::new();
355        store.insert(a1.clone());
356        store.insert(a2.clone());
357        store.insert(g.clone());
358
359        let log = tka_log_entries(&store, g.hash(), 100);
360
361        // (a) head-first: newest (a2) → genesis (g).
362        let got_hashes: Vec<[u8; 32]> = log.iter().map(|e| e.aum_hash).collect();
363        assert_eq!(
364            got_hashes,
365            vec![a2.hash().0, a1.hash().0, g.hash().0],
366            "log must be head-first (a2, a1, genesis)"
367        );
368        // (b) change strings.
369        let changes: Vec<&str> = log.iter().map(|e| e.change.as_str()).collect();
370        assert_eq!(changes, vec!["add-key", "add-key", "checkpoint"]);
371        // (c) aum_hash == Aum::hash().0 (re-checked against the genesis explicitly).
372        assert_eq!(log[2].aum_hash, g.hash().0);
373        // (d) raw round-trips through the AUM decoder back to the same AUM.
374        for (entry, aum) in log.iter().zip([&a2, &a1, &g]) {
375            let decoded = Aum::from_cbor(&entry.raw).expect("raw is canonical AUM CBOR");
376            assert_eq!(&decoded, aum, "raw must decode back to the source AUM");
377        }
378    }
379
380    /// `limit` truncates from the head (the most recent `limit` entries).
381    #[test]
382    fn tka_log_entries_limit_truncates_from_head() {
383        let g = genesis_checkpoint(test_aum_key(1, 1));
384        let a1 = add_key_child(&g, test_aum_key(2, 1));
385        let a2 = add_key_child(&a1, test_aum_key(3, 1));
386        let store = MemAumStore::from_aums([g.clone(), a1.clone(), a2.clone()]);
387
388        let log = tka_log_entries(&store, g.hash(), 2);
389        assert_eq!(log.len(), 2, "limit caps the row count");
390        assert_eq!(
391            log.iter().map(|e| e.aum_hash).collect::<Vec<_>>(),
392            vec![a2.hash().0, a1.hash().0],
393            "limit keeps the newest entries (head-first)"
394        );
395        // limit 0 → empty.
396        assert!(tka_log_entries(&store, g.hash(), 0).is_empty());
397    }
398
399    /// `signer_key_ids` is the `key_id` of each [`AumSignature`](ts_tka::AumSignature) on the AUM,
400    /// in order — what a daemon renders without re-decoding `raw`.
401    #[test]
402    fn tka_log_entries_extracts_signer_key_ids() {
403        use ed25519_dalek::SigningKey;
404        let mut g = genesis_checkpoint(test_aum_key(1, 1));
405        // Sign the genesis with the key it seeds (exactly what `Aum::sign` records: key_id = the
406        // signer's verifying-key bytes).
407        let sk = SigningKey::from_bytes(&[1u8; 32]);
408        g.sign(&sk);
409        let signer_id = sk.verifying_key().to_bytes().to_vec();
410        let store = MemAumStore::from_aums([g.clone()]);
411
412        let log = tka_log_entries(&store, g.hash(), 100);
413        assert_eq!(log.len(), 1);
414        assert_eq!(
415            log[0].signer_key_ids,
416            vec![signer_id],
417            "signer_key_ids carries each signature's key_id"
418        );
419        // An unsigned AUM yields no signer ids.
420        let unsigned = genesis_checkpoint(test_aum_key(2, 1));
421        let store2 = MemAumStore::from_aums([unsigned.clone()]);
422        assert!(
423            tka_log_entries(&store2, unsigned.hash(), 100)[0]
424                .signer_key_ids
425                .is_empty()
426        );
427    }
428
429    /// An empty / unwalkable store yields an empty log (mirrors the no-lock-synced case the actor
430    /// short-circuits before ever calling this): a missing genesis is an empty history, never an
431    /// error.
432    #[test]
433    fn tka_log_entries_unwalkable_store_is_empty() {
434        // Empty store: any `oldest` is absent → BadChain inside, mapped to an empty Vec.
435        let empty = MemAumStore::new();
436        assert!(tka_log_entries(&empty, AumHash([0u8; 32]), 100).is_empty());
437        // Non-empty store but `oldest` not present → still empty (not a panic / error).
438        let g = genesis_checkpoint(test_aum_key(1, 1));
439        let store = MemAumStore::from_aums([g]);
440        assert!(tka_log_entries(&store, AumHash([0xEE; 32]), 100).is_empty());
441    }
442}