Skip to main content

cellos_supervisor/resolver_refresh/
rebinding.rs

1//! SEC-21 Phase 3e — per-cell DNS rebinding state tracker.
2//!
3//! Pure data, no I/O. The [`RebindingState`] holds a per-hostname history of
4//! every distinct IP ever observed across all refresh ticks for the cell's
5//! lifetime. On each tick the ticker calls [`RebindingState::evaluate`] BEFORE
6//! emitting the standard `dns_authority_drift` event, gets back a
7//! [`RebindingDecision`] describing:
8//!
9//! 1. Which IPs in the new response are net-new (`novel_ips`).
10//! 2. Whether adding them would push the cumulative distinct-IP count above
11//!    `policy.max_novel_ips_per_hostname` (`threshold_exceeded`).
12//! 3. Which IPs violate the operator-declared allowlist (`allowlist_violations`).
13//! 4. The `effective_targets` the workload should see — equal to
14//!    `new_targets` in audit-only mode (`reject_on_rebind=false`); filtered to
15//!    drop allowlist violations and over-cap novel IPs in enforcement mode
16//!    (`reject_on_rebind=true`).
17//!
18//! The ticker emits `dns_authority_rebind_threshold` and
19//! `dns_authority_rebind_rejected` events from the decision, then calls
20//! [`RebindingState::commit`] to persist the new observation.
21//!
22//! Combined with the P3a TTL floor (`refresh_policy.min_ttl_seconds`), the
23//! P3e tracker structurally closes the v0.4.0 honest residual "DNS rebinding
24//! (TTL=0 / fast-flux) — resolver-side mitigation territory". The TTL floor
25//! limits how often responses can change; the per-hostname IP tracker limits
26//! what they can change to.
27//!
28//! ## Allowlist format
29//!
30//! [`DnsRebindingPolicy::response_ip_allowlist`] entries are
31//! `hostname:ip-or-cidr` strings — split on the first `:`, the prefix
32//! identifies the hostname, the suffix is either an IPv4/IPv6 literal or a
33//! CIDR prefix (e.g. `203.0.113.0/24`). Schema-side validation should
34//! pre-filter malformed entries; this module silently skips entries it can't
35//! parse so resolver-refresh never crashes on user input.
36
37use std::collections::{HashMap, HashSet};
38use std::net::IpAddr;
39use std::str::FromStr;
40
41use ipnet::IpNet;
42
43use cellos_core::DnsRebindingPolicy;
44
45/// Per-hostname history of every distinct IP ever observed across all
46/// refresh ticks for the cell's lifetime.
47///
48/// Owned by the [`super::ticker::TickerHandle`] (via the spawned task's
49/// internal state) so observations persist across ticks without leaking out
50/// of the ticker's lifetime. Reset to empty when a new cell starts.
51#[derive(Debug, Default, Clone)]
52pub struct RebindingState {
53    /// hostname → distinct IPs observed so far, in insertion order.
54    histories: HashMap<String, Vec<String>>,
55}
56
57/// Result of a single per-hostname [`RebindingState::evaluate`] call.
58///
59/// Borrows from the input `new_targets` slice for the IP-list fields to
60/// avoid an allocation when the caller only needs to iterate; the
61/// `effective_targets` field is owned because filtering produces a new
62/// `Vec<String>` regardless.
63pub struct RebindingDecision<'a> {
64    /// IPs from the new response that are net-new (not in prior history),
65    /// in the order they appeared in `new_targets`.
66    pub novel_ips: Vec<&'a str>,
67    /// True when at least one novel IP exists AND
68    /// `policy.max_novel_ips_per_hostname` would be exceeded after adding
69    /// them. Caller emits a `dns_authority_rebind_threshold` event per
70    /// hostname (one per tick, regardless of how many novel IPs).
71    pub threshold_exceeded: bool,
72    /// IPs that fail `policy.response_ip_allowlist` (when set), in the
73    /// order they appeared in `new_targets`. Caller emits one
74    /// `dns_authority_rebind_rejected` event per IP. Empty when the
75    /// allowlist is empty (allowlist enforcement is opt-in).
76    pub allowlist_violations: Vec<&'a str>,
77    /// The resolved-target set the workload should see.
78    ///
79    /// - When `policy.reject_on_rebind == false` (audit-only, the Phase 3e
80    ///   default): equal to `new_targets`. Events fire but the IPs remain
81    ///   in the workload's resolution.
82    /// - When `policy.reject_on_rebind == true`: filtered to drop
83    ///   (a) every IP in `allowlist_violations` AND
84    ///   (b) novel IPs that push the cumulative count past
85    ///   `policy.max_novel_ips_per_hostname`. The "last `len() - max`
86    ///   novel IPs" are the ones dropped — we keep the IPs that were
87    ///   observed first, on the assumption that legitimate CDN rotation
88    ///   looks like a small steady set.
89    pub effective_targets: Vec<String>,
90}
91
92impl RebindingState {
93    /// Create an empty state. Callers reuse a single instance for the
94    /// lifetime of the cell so prior observations persist.
95    #[must_use]
96    pub fn new() -> Self {
97        Self::default()
98    }
99
100    /// Number of hostnames currently tracked. Test affordance / metrics.
101    #[must_use]
102    pub fn hostname_count(&self) -> usize {
103        self.histories.len()
104    }
105
106    /// Distinct-IP history for `hostname`, or empty when no observation has
107    /// been committed yet. Test affordance / introspection.
108    #[must_use]
109    pub fn history(&self, hostname: &str) -> &[String] {
110        self.histories
111            .get(hostname)
112            .map(|v| v.as_slice())
113            .unwrap_or(&[])
114    }
115
116    /// Evaluate `new_targets` for `hostname` against the per-hostname
117    /// history and the operator's [`DnsRebindingPolicy`].
118    ///
119    /// Pure function — DOES NOT mutate state. Caller emits events from the
120    /// returned [`RebindingDecision`], then calls [`Self::commit`] to
121    /// persist the observation (the commit takes the EFFECTIVE targets so
122    /// the history reflects what the workload actually saw).
123    ///
124    /// Semantics:
125    ///
126    /// - `novel_ips = new_targets - history[hostname]`. Order preserved
127    ///   from `new_targets`.
128    /// - `threshold_exceeded = history[hostname].len() + novel_ips.len()`
129    ///   strictly greater than `policy.max_novel_ips_per_hostname`. False
130    ///   when no novel IPs exist (a steady CDN with churn within the prior
131    ///   history is fine).
132    /// - `allowlist_violations` is empty when `policy.response_ip_allowlist`
133    ///   is empty (allowlist enforcement is strictly opt-in). Otherwise,
134    ///   each IP in `new_targets` is checked against the parsed allowlist
135    ///   filtered to entries with this `hostname` prefix; IPs failing all
136    ///   entries are violations.
137    /// - `effective_targets` is `new_targets` verbatim when
138    ///   `policy.reject_on_rebind == false`. Otherwise, allowlist
139    ///   violations AND over-cap novel IPs are filtered out.
140    pub fn evaluate<'a>(
141        &self,
142        hostname: &str,
143        new_targets: &'a [String],
144        policy: &DnsRebindingPolicy,
145    ) -> RebindingDecision<'a> {
146        let prior: HashSet<&String> = self
147            .histories
148            .get(hostname)
149            .map(|v| v.iter().collect())
150            .unwrap_or_default();
151
152        // Build novel_ips deterministically (order from new_targets), but
153        // de-dupe within the response itself so a response that contains
154        // [1.1.1.1, 1.1.1.1, 1.0.0.1] doesn't double-count the dupe.
155        let mut novel_ips: Vec<&str> = Vec::new();
156        let mut novel_seen: HashSet<&str> = HashSet::new();
157        for ip in new_targets {
158            let s: &str = ip.as_str();
159            if !prior.iter().any(|p| p.as_str() == s) && novel_seen.insert(s) {
160                novel_ips.push(s);
161            }
162        }
163
164        let prior_len = prior.len() as u64;
165        let novel_len = novel_ips.len() as u64;
166        let cap = u64::from(policy.max_novel_ips_per_hostname);
167        // Threshold fires only when there is something novel AND adding it
168        // would push the cumulative distinct-IP count strictly past the
169        // cap. A steady CDN with churn within prior history (no novel IPs)
170        // is silent.
171        let threshold_exceeded = novel_len > 0 && (prior_len.saturating_add(novel_len)) > cap;
172
173        // Allowlist evaluation — only when allowlist is non-empty. We parse
174        // every entry once per call (allowlist is small in practice; the
175        // cell's spec usually declares a few dozen at most) and filter by
176        // hostname prefix. An IP is a violation if it fails ALL applicable
177        // entries; when no entries apply to this hostname, EVERY IP in the
178        // response is a violation (operator declared an allowlist that
179        // doesn't cover this hostname → fail-closed for the hostname).
180        let mut allowlist_violations: Vec<&str> = Vec::new();
181        if !policy.response_ip_allowlist.is_empty() {
182            let entries = parse_allowlist_for_hostname(&policy.response_ip_allowlist, hostname);
183            for ip_str in new_targets {
184                let s: &str = ip_str.as_str();
185                if !ip_in_allowlist(s, &entries) {
186                    allowlist_violations.push(s);
187                }
188            }
189        }
190
191        // Effective targets — verbatim in audit mode; filtered in enforce
192        // mode. Filtering: drop (a) allowlist violations AND (b) novel IPs
193        // beyond the cap (i.e. the LAST `novel_len - keep_novel` novel IPs
194        // in `novel_ips`).
195        let effective_targets: Vec<String> = if policy.reject_on_rebind {
196            // Compute the set of dropped novel IPs (over the cap).
197            let mut dropped_novel: HashSet<&str> = HashSet::new();
198            if threshold_exceeded {
199                // How many novel IPs may we keep before exceeding cap?
200                // keep_novel = max(0, cap - prior_len). Saturating because
201                // prior_len could already be > cap (operator lowered the cap
202                // mid-cell run, or never set it from default 4 and the
203                // cell already churned past).
204                let keep_novel = cap.saturating_sub(prior_len) as usize;
205                for &novel in novel_ips.iter().skip(keep_novel) {
206                    dropped_novel.insert(novel);
207                }
208            }
209            let dropped_allowlist: HashSet<&str> = allowlist_violations.iter().copied().collect();
210
211            new_targets
212                .iter()
213                .filter(|t| {
214                    !dropped_novel.contains(t.as_str()) && !dropped_allowlist.contains(t.as_str())
215                })
216                .cloned()
217                .collect()
218        } else {
219            new_targets.to_vec()
220        };
221
222        RebindingDecision {
223            novel_ips,
224            threshold_exceeded,
225            allowlist_violations,
226            effective_targets,
227        }
228    }
229
230    /// Persist the current observation. Must be called AFTER the caller has
231    /// emitted any threshold/rejected events so the state reflects the
232    /// post-tick view.
233    ///
234    /// Takes the EFFECTIVE targets (post-rejection) so the history reflects
235    /// what the workload actually saw. In audit-only mode the effective
236    /// targets equal the raw response, so commit is functionally equivalent
237    /// to "remember everything we observed."
238    pub fn commit(&mut self, hostname: &str, effective_targets: &[String]) {
239        let history = self.histories.entry(hostname.to_string()).or_default();
240        for t in effective_targets {
241            if !history.iter().any(|h| h == t) {
242                history.push(t.clone());
243            }
244        }
245    }
246}
247
248/// Parsed allowlist entry: a hostname-prefix and an IP-or-CIDR matcher.
249#[derive(Debug)]
250enum AllowlistMatcher {
251    /// Exact IPv4/IPv6 literal (no prefix length).
252    Ip(IpAddr),
253    /// CIDR network — checked via `IpNet::contains`.
254    Net(IpNet),
255}
256
257/// Parse the operator's `response_ip_allowlist` into matchers applicable to
258/// `hostname`. Skips malformed entries silently. Pure — no I/O, no panic.
259fn parse_allowlist_for_hostname(entries: &[String], hostname: &str) -> Vec<AllowlistMatcher> {
260    let mut out: Vec<AllowlistMatcher> = Vec::new();
261    for raw in entries {
262        // Format is `hostname:ip-or-cidr`. Split on FIRST `:` so an IPv6
263        // literal in the suffix (which contains its own `:`s) survives.
264        let Some((prefix, suffix)) = raw.split_once(':') else {
265            continue; // malformed, no separator
266        };
267        if prefix != hostname {
268            continue; // entry applies to a different hostname
269        }
270        let suffix = suffix.trim();
271        if suffix.is_empty() {
272            continue; // malformed, empty matcher
273        }
274        if suffix.contains('/') {
275            if let Ok(net) = IpNet::from_str(suffix) {
276                out.push(AllowlistMatcher::Net(net));
277            }
278            // else: silently drop — schema validation should pre-filter,
279            // resolver-refresh never crashes on operator input.
280        } else if let Ok(ip) = IpAddr::from_str(suffix) {
281            out.push(AllowlistMatcher::Ip(ip));
282        }
283        // else: silently drop malformed literal.
284    }
285    out
286}
287
288/// Test whether `ip_str` matches any entry in `entries`. Returns false when
289/// `entries` is empty (caller is responsible for the "allowlist not
290/// applicable to this hostname" semantic — empty `entries` here ALWAYS
291/// means the IP fails). Also returns false when `ip_str` is not a valid IP
292/// literal — defensive against resolvers returning hostname-typed targets.
293fn ip_in_allowlist(ip_str: &str, entries: &[AllowlistMatcher]) -> bool {
294    let Ok(ip) = IpAddr::from_str(ip_str) else {
295        return false;
296    };
297    entries.iter().any(|e| match e {
298        AllowlistMatcher::Ip(matcher) => *matcher == ip,
299        AllowlistMatcher::Net(net) => net.contains(&ip),
300    })
301}
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306
307    fn policy_default() -> DnsRebindingPolicy {
308        DnsRebindingPolicy::default()
309    }
310
311    fn policy_with(max: u32, reject: bool, allowlist: Vec<&str>) -> DnsRebindingPolicy {
312        DnsRebindingPolicy {
313            response_ip_allowlist: allowlist.into_iter().map(String::from).collect(),
314            max_novel_ips_per_hostname: max,
315            reject_on_rebind: reject,
316        }
317    }
318
319    fn s(items: &[&str]) -> Vec<String> {
320        items.iter().map(|s| (*s).to_string()).collect()
321    }
322
323    // ------------------------------------------------------------
324    // Novel-IP detection.
325    // ------------------------------------------------------------
326
327    #[test]
328    fn evaluate_returns_all_novel_when_first_observation() {
329        let state = RebindingState::new();
330        let new_targets = s(&["1.1.1.1", "1.0.0.1"]);
331        let policy = policy_default();
332        let decision = state.evaluate("api.example.com", &new_targets, &policy);
333        assert_eq!(decision.novel_ips, vec!["1.1.1.1", "1.0.0.1"]);
334    }
335
336    #[test]
337    fn evaluate_returns_no_novel_when_repeat_observation() {
338        let mut state = RebindingState::new();
339        let first = s(&["1.1.1.1", "1.0.0.1"]);
340        state.commit("api.example.com", &first);
341        let new_targets = s(&["1.1.1.1", "1.0.0.1"]);
342        let policy = policy_default();
343        let decision = state.evaluate("api.example.com", &new_targets, &policy);
344        assert!(decision.novel_ips.is_empty());
345        assert!(!decision.threshold_exceeded);
346    }
347
348    // ------------------------------------------------------------
349    // Threshold semantics — boundary checks.
350    // ------------------------------------------------------------
351
352    #[test]
353    fn evaluate_threshold_exceeded_above_max_novel_ips() {
354        let mut state = RebindingState::new();
355        // Prior history has 3 IPs; cap is 4; new response adds 2 more →
356        // cumulative 5 > 4 → threshold exceeded.
357        state.commit("h", &s(&["1.0.0.1", "1.0.0.2", "1.0.0.3"]));
358        let policy = policy_with(4, false, vec![]);
359        let new_targets = s(&["1.0.0.4", "1.0.0.5"]);
360        let decision = state.evaluate("h", &new_targets, &policy);
361        assert!(decision.threshold_exceeded);
362        assert_eq!(decision.novel_ips, vec!["1.0.0.4", "1.0.0.5"]);
363    }
364
365    #[test]
366    fn evaluate_threshold_not_exceeded_at_exact_max() {
367        let mut state = RebindingState::new();
368        // Prior history has 3 IPs; cap is 4; new response adds 1 more →
369        // cumulative exactly 4 → NOT exceeded (boundary is strict >).
370        state.commit("h", &s(&["1.0.0.1", "1.0.0.2", "1.0.0.3"]));
371        let policy = policy_with(4, false, vec![]);
372        let new_targets = s(&["1.0.0.4"]);
373        let decision = state.evaluate("h", &new_targets, &policy);
374        assert!(!decision.threshold_exceeded);
375        assert_eq!(decision.novel_ips, vec!["1.0.0.4"]);
376    }
377
378    // ------------------------------------------------------------
379    // Allowlist semantics.
380    // ------------------------------------------------------------
381
382    #[test]
383    fn evaluate_allowlist_violations_when_set() {
384        let state = RebindingState::new();
385        let policy = policy_with(
386            10,
387            false,
388            vec!["api.example.com:1.1.1.1", "api.example.com:1.0.0.1"],
389        );
390        let new_targets = s(&["1.1.1.1", "198.51.100.7"]);
391        let decision = state.evaluate("api.example.com", &new_targets, &policy);
392        assert_eq!(decision.allowlist_violations, vec!["198.51.100.7"]);
393    }
394
395    #[test]
396    fn evaluate_no_allowlist_violations_when_unset() {
397        let state = RebindingState::new();
398        let policy = policy_with(10, false, vec![]);
399        let new_targets = s(&["198.51.100.7"]);
400        let decision = state.evaluate("api.example.com", &new_targets, &policy);
401        assert!(decision.allowlist_violations.is_empty());
402    }
403
404    // ------------------------------------------------------------
405    // Reject-on-rebind filtering.
406    // ------------------------------------------------------------
407
408    #[test]
409    fn evaluate_reject_on_rebind_filters_novel_above_threshold() {
410        let mut state = RebindingState::new();
411        state.commit("h", &s(&["1.0.0.1", "1.0.0.2", "1.0.0.3", "1.0.0.4"])); // already at cap=4
412        let policy = policy_with(4, true, vec![]);
413        // Two novel IPs come in; both should be filtered out under reject.
414        let new_targets = s(&["1.0.0.4", "1.0.0.5", "1.0.0.6"]);
415        let decision = state.evaluate("h", &new_targets, &policy);
416        assert!(decision.threshold_exceeded);
417        // Only the prior-known 1.0.0.4 survives; 1.0.0.5 and 1.0.0.6 dropped.
418        assert_eq!(decision.effective_targets, vec!["1.0.0.4".to_string()]);
419    }
420
421    #[test]
422    fn evaluate_reject_on_rebind_filters_allowlist_violations() {
423        let state = RebindingState::new();
424        let policy = policy_with(10, true, vec!["api.example.com:1.1.1.1"]);
425        let new_targets = s(&["1.1.1.1", "198.51.100.7"]);
426        let decision = state.evaluate("api.example.com", &new_targets, &policy);
427        assert_eq!(decision.allowlist_violations, vec!["198.51.100.7"]);
428        assert_eq!(decision.effective_targets, vec!["1.1.1.1".to_string()]);
429    }
430
431    #[test]
432    fn evaluate_audit_only_keeps_violations_in_effective_targets() {
433        let state = RebindingState::new();
434        // reject=false → audit-only: violations recorded but NOT filtered.
435        let policy = policy_with(10, false, vec!["api.example.com:1.1.1.1"]);
436        let new_targets = s(&["1.1.1.1", "198.51.100.7"]);
437        let decision = state.evaluate("api.example.com", &new_targets, &policy);
438        assert_eq!(decision.allowlist_violations, vec!["198.51.100.7"]);
439        assert_eq!(decision.effective_targets, new_targets);
440    }
441
442    // ------------------------------------------------------------
443    // Commit semantics.
444    // ------------------------------------------------------------
445
446    #[test]
447    fn commit_persists_observation() {
448        let mut state = RebindingState::new();
449        assert_eq!(state.hostname_count(), 0);
450        state.commit("h", &s(&["1.1.1.1"]));
451        assert_eq!(state.hostname_count(), 1);
452        assert_eq!(state.history("h"), &["1.1.1.1".to_string()]);
453        // Idempotent on duplicate IPs.
454        state.commit("h", &s(&["1.1.1.1", "1.0.0.1"]));
455        assert_eq!(
456            state.history("h"),
457            &["1.1.1.1".to_string(), "1.0.0.1".to_string()]
458        );
459    }
460
461    // ------------------------------------------------------------
462    // Allowlist parsing — malformed inputs MUST NOT panic.
463    // ------------------------------------------------------------
464
465    #[test]
466    fn parse_allowlist_skips_malformed_entries() {
467        let entries: Vec<String> = vec![
468            "no-colon-here".into(),        // malformed: no separator
469            "h:".into(),                   // malformed: empty suffix
470            "h:not-an-ip".into(),          // malformed: junk literal
471            "h:999.999.999.999".into(),    // malformed: out-of-range octets
472            "h:1.1.1.1/notanumber".into(), // malformed: bad CIDR prefix
473            "h:1.1.1.1".into(),            // valid
474        ];
475        let parsed = parse_allowlist_for_hostname(&entries, "h");
476        assert_eq!(parsed.len(), 1, "only the well-formed entry survives");
477    }
478
479    #[test]
480    fn parse_allowlist_supports_cidr() {
481        let entries: Vec<String> = vec!["h:203.0.113.0/24".into()];
482        let parsed = parse_allowlist_for_hostname(&entries, "h");
483        assert_eq!(parsed.len(), 1);
484        // 203.0.113.42 should match the /24.
485        assert!(ip_in_allowlist("203.0.113.42", &parsed));
486        // 203.0.114.42 (different /24) must not match.
487        assert!(!ip_in_allowlist("203.0.114.42", &parsed));
488    }
489
490    // ------------------------------------------------------------
491    // Bonus: hostname filtering of allowlist entries.
492    // ------------------------------------------------------------
493
494    #[test]
495    fn allowlist_entries_for_other_hostname_are_ignored() {
496        let state = RebindingState::new();
497        // Allowlist applies to a DIFFERENT hostname → for "h", every IP
498        // fails (no applicable entries → fail-closed).
499        let policy = policy_with(10, false, vec!["other.example.com:1.1.1.1"]);
500        let new_targets = s(&["1.1.1.1"]);
501        let decision = state.evaluate("h", &new_targets, &policy);
502        assert_eq!(decision.allowlist_violations, vec!["1.1.1.1"]);
503    }
504}