net/adapter/net/dataforts/greedy/config.rs
1//! `GreedyConfig` — per-node tuning surface for the greedy-LRU
2//! dataforts subsystem. Locked defaults match
3//! `docs/misc/DATAFORTS_PLAN.md` § Phase 1.
4
5use std::time::Duration;
6
7use crate::adapter::net::behavior::placement::{ColocationPolicy, IntentMatchPolicy, ScopeLabel};
8
9/// Default per-channel cache cap. 100 MiB — large enough for
10/// typical chain working sets, small enough that a 10 GiB total
11/// budget covers ~100 distinct channels before eviction kicks in.
12pub const DEFAULT_PER_CHANNEL_CAP_BYTES: u64 = 100 * 1024 * 1024;
13
14/// Floor on `per_channel_cap_bytes`. Channels smaller than 1 MiB
15/// thrash on the per-event append path; reject the config at
16/// construction rather than letting the runtime fight the LRU.
17pub const MIN_PER_CHANNEL_CAP_BYTES: u64 = 1024 * 1024;
18
19/// Default total cache cap across every channel. 10 GiB — sized
20/// to fit comfortably on a small-disk edge node and large enough
21/// to materially absorb working-set reads at gigabit-class link
22/// rates.
23pub const DEFAULT_TOTAL_CAP_BYTES: u64 = 10 * 1024 * 1024 * 1024;
24
25/// Default proximity bound — chains whose home is more than 200 ms
26/// away from the local node don't admit, on the theory that the
27/// catch-up bandwidth required isn't worth the cache cost.
28pub const DEFAULT_PROXIMITY_MAX_RTT_MS: u64 = 200;
29
30/// Default I/O budget as a fraction of measured NIC peak. `0.25`
31/// leaves three-quarters of the link for foreground publish
32/// traffic.
33pub const DEFAULT_BANDWIDTH_BUDGET_FRACTION: f32 = 0.25;
34
35/// Default NIC peak the bandwidth budget is computed against, when
36/// no override is supplied: 1 Gbps in bytes/sec. A measured probe
37/// is intentionally still deferred (see `DATAFORTS_PLAN.md`
38/// § Phase 1); operators on faster NICs should set
39/// `nic_peak_bytes_per_s` explicitly to avoid proportional
40/// under-utilization.
41pub const DEFAULT_NIC_PEAK_BYTES_PER_S: u64 = 125_000_000;
42
43/// Default ceiling on in-flight `observe_event` tasks fanned out
44/// by the mesh inbound dispatch hook. Past this many spawned
45/// tasks the observer drops events and increments
46/// `dataforts_greedy_observer_dropped_total{reason="overloaded"}`
47/// rather than unbounded-spawning. Sized to absorb bursty
48/// publish traffic on a typical edge node while still capping
49/// the worst-case memory footprint at a few MiB of `Bytes` clones.
50pub const DEFAULT_OBSERVER_INFLIGHT_CAP: usize = 1024;
51
52/// Per-node configuration for [`crate::adapter::net::dataforts::greedy`].
53///
54/// Validation rules (enforced by [`Self::validate`]):
55///
56/// - `per_channel_cap_bytes >= MIN_PER_CHANNEL_CAP_BYTES`
57/// - `total_cap_bytes >= per_channel_cap_bytes`
58/// - `bandwidth_budget_fraction` is finite, `> 0.0`, `<= 1.0`
59/// - `proximity_max_rtt` is non-zero
60///
61/// `scopes` may be empty — an empty scope set admits chains
62/// regardless of `scope:` tags (greedy with no scope filter). To
63/// reject all scope-tagged chains, leave scopes empty and configure
64/// `intent_match: IntentMatchPolicy::Strict` so admission still
65/// gates on intent.
66#[derive(Debug, Clone)]
67pub struct GreedyConfig {
68 /// Local node's interesting scopes — chains whose `scope:` tag
69 /// matches any of these are eligible for admission.
70 pub scopes: Vec<ScopeLabel>,
71 /// Maximum acceptable RTT to the chain's home node before
72 /// admission rejects (proximity gate).
73 pub proximity_max_rtt: Duration,
74 /// Per-channel byte cap on the cache substrate. Reuses
75 /// `RedexFileConfig::with_retention_max_bytes` once the cache
76 /// runtime lands.
77 pub per_channel_cap_bytes: u64,
78 /// Total byte cap across every channel the greedy runtime is
79 /// holding. LRU eviction drives toward this bound.
80 pub total_cap_bytes: u64,
81 /// I/O budget for greedy cache writes, expressed as a fraction
82 /// of the measured NIC peak. Backpressures cache writes when
83 /// the budget is exhausted so application traffic isn't
84 /// crowded out.
85 pub bandwidth_budget_fraction: f32,
86 /// Override for the NIC peak (bytes/sec) the bandwidth budget
87 /// computes against. `None` falls back to
88 /// [`DEFAULT_NIC_PEAK_BYTES_PER_S`] (1 Gbps). Set this on
89 /// deployments with > 1 Gbps NICs — otherwise greedy throttles
90 /// at gigabit-class rates and the operator sees what looks
91 /// like an admission-reject storm in
92 /// `dataforts_greedy_admit_rejected_total{reason="bandwidth"}`.
93 pub nic_peak_bytes_per_s: Option<u64>,
94 /// Intent-axis admission policy. Reuses the substrate's
95 /// `IntentMatchPolicy` so greedy uses the same eligibility
96 /// shape as `StandardPlacement`.
97 pub intent_match: IntentMatchPolicy,
98 /// Colocation-axis admission policy. Soft preference by
99 /// default — colocation tilts admission toward affinity but
100 /// doesn't override capacity constraints.
101 pub colocation_policy: ColocationPolicy,
102 /// Maximum in-flight `observe_event` tasks. Sized to bound
103 /// the worst-case memory footprint of bursty inbound publish
104 /// traffic; observed events past this cap drop with a
105 /// metrics increment. See [`DEFAULT_OBSERVER_INFLIGHT_CAP`].
106 pub observer_inflight_cap: usize,
107}
108
109impl Default for GreedyConfig {
110 fn default() -> Self {
111 Self {
112 scopes: Vec::new(),
113 proximity_max_rtt: Duration::from_millis(DEFAULT_PROXIMITY_MAX_RTT_MS),
114 per_channel_cap_bytes: DEFAULT_PER_CHANNEL_CAP_BYTES,
115 total_cap_bytes: DEFAULT_TOTAL_CAP_BYTES,
116 bandwidth_budget_fraction: DEFAULT_BANDWIDTH_BUDGET_FRACTION,
117 nic_peak_bytes_per_s: None,
118 intent_match: IntentMatchPolicy::AnyOfLocalCapabilities,
119 colocation_policy: ColocationPolicy::SoftPreference,
120 observer_inflight_cap: DEFAULT_OBSERVER_INFLIGHT_CAP,
121 }
122 }
123}
124
125impl GreedyConfig {
126 /// Construct a config with the locked defaults from
127 /// `DATAFORTS_PLAN.md` § Phase 1.
128 pub fn new() -> Self {
129 Self::default()
130 }
131
132 /// Builder: replace the scope set.
133 pub fn with_scopes(mut self, scopes: Vec<ScopeLabel>) -> Self {
134 self.scopes = scopes;
135 self
136 }
137
138 /// Builder: set the proximity bound.
139 pub fn with_proximity_max_rtt(mut self, rtt: Duration) -> Self {
140 self.proximity_max_rtt = rtt;
141 self
142 }
143
144 /// Builder: set the per-channel cap.
145 pub fn with_per_channel_cap_bytes(mut self, cap: u64) -> Self {
146 self.per_channel_cap_bytes = cap;
147 self
148 }
149
150 /// Builder: set the total cap.
151 pub fn with_total_cap_bytes(mut self, cap: u64) -> Self {
152 self.total_cap_bytes = cap;
153 self
154 }
155
156 /// Builder: set the bandwidth budget fraction.
157 pub fn with_bandwidth_budget_fraction(mut self, fraction: f32) -> Self {
158 self.bandwidth_budget_fraction = fraction;
159 self
160 }
161
162 /// Builder: override the NIC peak (bytes/sec). `None` reverts
163 /// to the [`DEFAULT_NIC_PEAK_BYTES_PER_S`] fallback.
164 pub fn with_nic_peak_bytes_per_s(mut self, peak: Option<u64>) -> Self {
165 self.nic_peak_bytes_per_s = peak;
166 self
167 }
168
169 /// Builder: set the observer in-flight task cap. Hard floor
170 /// of 1 — a zero cap would mean "drop every event" and is
171 /// almost certainly a config mistake; clamp on the way in.
172 pub fn with_observer_inflight_cap(mut self, cap: usize) -> Self {
173 self.observer_inflight_cap = cap.max(1);
174 self
175 }
176
177 /// The effective NIC peak after applying the override-or-default
178 /// rule. Saturates to [`DEFAULT_NIC_PEAK_BYTES_PER_S`] when
179 /// `nic_peak_bytes_per_s` is `None` or `Some(0)`.
180 pub fn effective_nic_peak_bytes_per_s(&self) -> u64 {
181 match self.nic_peak_bytes_per_s {
182 Some(v) if v > 0 => v,
183 _ => DEFAULT_NIC_PEAK_BYTES_PER_S,
184 }
185 }
186
187 /// Builder: set the intent-match policy.
188 pub fn with_intent_match(mut self, policy: IntentMatchPolicy) -> Self {
189 self.intent_match = policy;
190 self
191 }
192
193 /// Builder: set the colocation policy.
194 pub fn with_colocation_policy(mut self, policy: ColocationPolicy) -> Self {
195 self.colocation_policy = policy;
196 self
197 }
198
199 /// Validate the locked invariants. Returns a typed error
200 /// naming the offending field so binding-layer callers can
201 /// surface operator-friendly diagnostics.
202 pub fn validate(&self) -> Result<(), GreedyConfigError> {
203 if self.per_channel_cap_bytes < MIN_PER_CHANNEL_CAP_BYTES {
204 return Err(GreedyConfigError::PerChannelCapTooLow {
205 got: self.per_channel_cap_bytes,
206 min: MIN_PER_CHANNEL_CAP_BYTES,
207 });
208 }
209 if self.total_cap_bytes < self.per_channel_cap_bytes {
210 return Err(GreedyConfigError::TotalCapBelowPerChannel {
211 total: self.total_cap_bytes,
212 per_channel: self.per_channel_cap_bytes,
213 });
214 }
215 if !self.bandwidth_budget_fraction.is_finite()
216 || self.bandwidth_budget_fraction <= 0.0
217 || self.bandwidth_budget_fraction > 1.0
218 {
219 return Err(GreedyConfigError::BudgetFractionOutOfRange {
220 got: self.bandwidth_budget_fraction,
221 });
222 }
223 if self.proximity_max_rtt.is_zero() {
224 return Err(GreedyConfigError::ProximityRttZero);
225 }
226 Ok(())
227 }
228}
229
230/// Typed validation errors. Distinct variants per invariant so
231/// the binding layer can route to language-idiomatic error
232/// classes without parsing strings.
233// `Eq` intentionally omitted — `BudgetFractionOutOfRange` carries
234// an `f32`, which has NaN asymmetry. `PartialEq` is sufficient for
235// the typical "compare against an expected error" pattern in tests.
236#[derive(Debug, thiserror::Error, PartialEq)]
237pub enum GreedyConfigError {
238 /// `per_channel_cap_bytes` is below the floor.
239 #[error("greedy per_channel_cap_bytes {got} below minimum {min}")]
240 PerChannelCapTooLow {
241 /// Configured value.
242 got: u64,
243 /// Minimum permitted value.
244 min: u64,
245 },
246 /// `total_cap_bytes < per_channel_cap_bytes`. A total budget
247 /// smaller than a single channel's cap can't admit any
248 /// channel.
249 #[error("greedy total_cap_bytes {total} must be ≥ per_channel_cap_bytes {per_channel}")]
250 TotalCapBelowPerChannel {
251 /// Configured total.
252 total: u64,
253 /// Configured per-channel cap.
254 per_channel: u64,
255 },
256 /// `bandwidth_budget_fraction` outside `(0.0, 1.0]` or
257 /// non-finite (NaN / ±inf).
258 #[error("greedy bandwidth_budget_fraction {got} outside (0.0, 1.0] or non-finite")]
259 BudgetFractionOutOfRange {
260 /// Configured value.
261 got: f32,
262 },
263 /// `proximity_max_rtt` is zero. A zero RTT bound excludes every
264 /// non-local peer and produces a single-node cache — almost
265 /// certainly a misconfig.
266 #[error("greedy proximity_max_rtt must be non-zero")]
267 ProximityRttZero,
268}
269
270#[cfg(test)]
271mod tests {
272 use super::*;
273
274 #[test]
275 fn default_is_valid() {
276 GreedyConfig::default()
277 .validate()
278 .expect("defaults must validate");
279 }
280
281 #[test]
282 fn per_channel_cap_below_floor_rejected() {
283 let cfg = GreedyConfig::default().with_per_channel_cap_bytes(1024);
284 let err = cfg.validate().expect_err("1 KiB cap must reject");
285 assert!(matches!(
286 err,
287 GreedyConfigError::PerChannelCapTooLow { got: 1024, .. }
288 ));
289 }
290
291 #[test]
292 fn total_cap_below_per_channel_rejected() {
293 let cfg = GreedyConfig::default()
294 .with_per_channel_cap_bytes(200 * 1024 * 1024)
295 .with_total_cap_bytes(100 * 1024 * 1024);
296 let err = cfg
297 .validate()
298 .expect_err("total below per-channel must reject");
299 assert!(matches!(
300 err,
301 GreedyConfigError::TotalCapBelowPerChannel { .. }
302 ));
303 }
304
305 #[test]
306 fn budget_fraction_zero_rejected() {
307 let cfg = GreedyConfig::default().with_bandwidth_budget_fraction(0.0);
308 let err = cfg.validate().expect_err("zero fraction must reject");
309 assert!(matches!(
310 err,
311 GreedyConfigError::BudgetFractionOutOfRange { .. }
312 ));
313 }
314
315 #[test]
316 fn budget_fraction_above_one_rejected() {
317 let cfg = GreedyConfig::default().with_bandwidth_budget_fraction(1.5);
318 let err = cfg.validate().expect_err("fraction above 1.0 must reject");
319 assert!(matches!(
320 err,
321 GreedyConfigError::BudgetFractionOutOfRange { .. }
322 ));
323 }
324
325 #[test]
326 fn budget_fraction_nan_rejected() {
327 let cfg = GreedyConfig::default().with_bandwidth_budget_fraction(f32::NAN);
328 let err = cfg.validate().expect_err("NaN fraction must reject");
329 assert!(matches!(
330 err,
331 GreedyConfigError::BudgetFractionOutOfRange { .. }
332 ));
333 }
334
335 #[test]
336 fn budget_fraction_inf_rejected() {
337 let cfg = GreedyConfig::default().with_bandwidth_budget_fraction(f32::INFINITY);
338 let err = cfg.validate().expect_err("inf fraction must reject");
339 assert!(matches!(
340 err,
341 GreedyConfigError::BudgetFractionOutOfRange { .. }
342 ));
343 }
344
345 #[test]
346 fn proximity_rtt_zero_rejected() {
347 let cfg = GreedyConfig::default().with_proximity_max_rtt(Duration::ZERO);
348 let err = cfg.validate().expect_err("zero RTT must reject");
349 assert!(matches!(err, GreedyConfigError::ProximityRttZero));
350 }
351
352 #[test]
353 fn boundary_values_admitted() {
354 // Floor values for each axis — should all validate.
355 let cfg = GreedyConfig::default()
356 .with_per_channel_cap_bytes(MIN_PER_CHANNEL_CAP_BYTES)
357 .with_total_cap_bytes(MIN_PER_CHANNEL_CAP_BYTES)
358 .with_bandwidth_budget_fraction(1.0)
359 .with_proximity_max_rtt(Duration::from_nanos(1));
360 cfg.validate().expect("boundary values are admissible");
361 }
362}