Skip to main content

kanade_shared/wire/
agent_config.rs

1//! Layered fleet configuration that lives in the `agent_config` KV
2//! bucket (Sprint 6).
3//!
4//! Three scopes flow into the agent's effective config, in order of
5//! increasing specificity:
6//!
7//! ```text
8//! built-in default        (compiled in; floor when nothing else is set)
9//!   ↓
10//! agent_config:global     (whole-fleet default)
11//!   ↓
12//! agent_config:groups.<g> (per-group override; one or more apply)
13//!   ↓
14//! agent_config:pcs.<pc>   (per-PC override; final word)
15//! ```
16//!
17//! The wire type for every scope is the same — [`ConfigScope`], a
18//! struct of `Option<T>` fields. `Some` means "this scope sets this
19//! field"; `None` means "fall through to the next layer". JSON
20//! `null` is the same as the field being absent thanks to serde's
21//! struct-level `default`.
22//!
23//! [`resolve`] is the pure functional core that flattens the scope
24//! stack into an [`EffectiveConfig`] (concrete values, no Options).
25//! When the same field is set on more than one group the PC belongs
26//! to, alphabetical group order wins last (CSS-cascade style) and a
27//! [`ResolutionWarning::MultiGroupConflict`] is emitted so the
28//! caller can log it — pre-empts the "why does this PC have value X?
29//! none of my groups say X" debugging session.
30
31use std::collections::BTreeMap;
32use std::time::Duration;
33
34use serde::{Deserialize, Serialize};
35
36/// Per-scope partial config. Every field is `Option<T>`: `Some` =
37/// set, `None` = inherit from the next-less-specific scope. Serde
38/// `default` + `skip_serializing_if` keeps the wire JSON tight —
39/// unset fields don't appear in the bucket value.
40#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq)]
41#[serde(default)]
42pub struct ConfigScope {
43    #[serde(skip_serializing_if = "Option::is_none")]
44    pub target_version: Option<String>,
45    #[serde(skip_serializing_if = "Option::is_none")]
46    pub inventory_interval: Option<String>,
47    #[serde(skip_serializing_if = "Option::is_none")]
48    pub inventory_jitter: Option<String>,
49    #[serde(skip_serializing_if = "Option::is_none")]
50    pub inventory_enabled: Option<bool>,
51    #[serde(skip_serializing_if = "Option::is_none")]
52    pub heartbeat_interval: Option<String>,
53}
54
55impl ConfigScope {
56    pub fn is_empty(&self) -> bool {
57        self.target_version.is_none()
58            && self.inventory_interval.is_none()
59            && self.inventory_jitter.is_none()
60            && self.inventory_enabled.is_none()
61            && self.heartbeat_interval.is_none()
62    }
63}
64
65/// Concrete config the agent runs against once the scope stack has
66/// been flattened. `target_version` stays `Option` because "no
67/// rollout target set anywhere" is a meaningful state (the agent
68/// just keeps running the version it has); the other fields always
69/// have a value, falling back to [`EffectiveConfig::builtin_defaults`]
70/// when no scope sets them.
71#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
72pub struct EffectiveConfig {
73    pub target_version: Option<String>,
74    pub inventory_interval: String,
75    pub inventory_jitter: String,
76    pub inventory_enabled: bool,
77    pub heartbeat_interval: String,
78}
79
80impl EffectiveConfig {
81    /// Floor values used when no KV scope sets a given field.
82    /// Mirrors the historic agent.toml defaults so unbootstrapped
83    /// fleets keep behaving the way they did pre-Sprint 6.
84    pub fn builtin_defaults() -> Self {
85        Self {
86            target_version: None,
87            inventory_interval: "24h".to_string(),
88            inventory_jitter: "10m".to_string(),
89            inventory_enabled: true,
90            heartbeat_interval: "30s".to_string(),
91        }
92    }
93
94    /// Parsed `heartbeat_interval`, falling back to the built-in
95    /// 30 s default on a malformed string. Logging the parse error
96    /// is the caller's job (so that test code can stay quiet).
97    pub fn heartbeat_duration(&self) -> Duration {
98        humantime::parse_duration(&self.heartbeat_interval).unwrap_or(Duration::from_secs(30))
99    }
100
101    pub fn inventory_interval_duration(&self) -> Duration {
102        humantime::parse_duration(&self.inventory_interval)
103            .unwrap_or(Duration::from_secs(24 * 60 * 60))
104    }
105
106    pub fn inventory_jitter_duration(&self) -> Duration {
107        humantime::parse_duration(&self.inventory_jitter).unwrap_or(Duration::from_secs(600))
108    }
109}
110
111impl Default for EffectiveConfig {
112    fn default() -> Self {
113        Self::builtin_defaults()
114    }
115}
116
117/// Non-fatal observations from [`resolve`] that the caller should
118/// log. Currently only "two of this PC's groups set the same field
119/// to different values" — useful pre-emptive debugging signal when
120/// canary / wave / dept overlays accidentally overlap.
121#[derive(Debug, Clone, PartialEq, Eq)]
122pub enum ResolutionWarning {
123    MultiGroupConflict {
124        field: &'static str,
125        /// Group names that set this field, in alphabetical order
126        /// (i.e. the application order — the last name in this list
127        /// is the one whose value actually won).
128        groups: Vec<String>,
129    },
130}
131
132/// Flatten the scope stack into an [`EffectiveConfig`].
133///
134/// * `global` — the `global` key in the `agent_config` bucket
135///   (`None` if no row yet).
136/// * `group_scopes` — every `groups.<name>` row currently in the
137///   bucket (the caller can pass all of them; only the ones whose
138///   name is in `my_groups` are applied).
139/// * `pc_scope` — the `pcs.<pc_id>` row for this agent (`None` if
140///   no row yet).
141/// * `my_groups` — this agent's current memberships (from the
142///   `agent_groups` bucket).
143///
144/// Order of application: built-in default → global → per-group
145/// (alphabetical, last wins) → per-pc. Multi-group conflicts (≥ 2
146/// of `my_groups` setting the same field) are returned as warnings
147/// alongside the resolved config.
148pub fn resolve(
149    global: Option<&ConfigScope>,
150    group_scopes: &BTreeMap<String, ConfigScope>,
151    pc_scope: Option<&ConfigScope>,
152    my_groups: &[String],
153) -> (EffectiveConfig, Vec<ResolutionWarning>) {
154    let mut out = EffectiveConfig::builtin_defaults();
155    let mut warnings = Vec::new();
156
157    if let Some(g) = global {
158        apply_scope(&mut out, g);
159    }
160
161    // Sort + dedup the group list so iteration order is deterministic
162    // and "last wins" is well-defined.
163    let mut sorted_groups: Vec<&str> = my_groups.iter().map(String::as_str).collect();
164    sorted_groups.sort();
165    sorted_groups.dedup();
166
167    // Pass 1: find multi-setter fields so the caller can warn before
168    // pass 2 silently lets the alphabetical-last value win.
169    let mut setters: BTreeMap<&'static str, Vec<String>> = BTreeMap::new();
170    for g in &sorted_groups {
171        let Some(scope) = group_scopes.get(*g) else {
172            continue;
173        };
174        if scope.target_version.is_some() {
175            setters
176                .entry("target_version")
177                .or_default()
178                .push(g.to_string());
179        }
180        if scope.inventory_interval.is_some() {
181            setters
182                .entry("inventory_interval")
183                .or_default()
184                .push(g.to_string());
185        }
186        if scope.inventory_jitter.is_some() {
187            setters
188                .entry("inventory_jitter")
189                .or_default()
190                .push(g.to_string());
191        }
192        if scope.inventory_enabled.is_some() {
193            setters
194                .entry("inventory_enabled")
195                .or_default()
196                .push(g.to_string());
197        }
198        if scope.heartbeat_interval.is_some() {
199            setters
200                .entry("heartbeat_interval")
201                .or_default()
202                .push(g.to_string());
203        }
204    }
205    for (field, groups) in setters {
206        if groups.len() > 1 {
207            warnings.push(ResolutionWarning::MultiGroupConflict { field, groups });
208        }
209    }
210
211    // Pass 2: actually apply, alphabetically. Last-wins by construction.
212    for g in &sorted_groups {
213        if let Some(scope) = group_scopes.get(*g) {
214            apply_scope(&mut out, scope);
215        }
216    }
217
218    if let Some(p) = pc_scope {
219        apply_scope(&mut out, p);
220    }
221
222    (out, warnings)
223}
224
225fn apply_scope(out: &mut EffectiveConfig, s: &ConfigScope) {
226    if let Some(v) = &s.target_version {
227        out.target_version = Some(v.clone());
228    }
229    if let Some(v) = &s.inventory_interval {
230        out.inventory_interval = v.clone();
231    }
232    if let Some(v) = &s.inventory_jitter {
233        out.inventory_jitter = v.clone();
234    }
235    if let Some(v) = s.inventory_enabled {
236        out.inventory_enabled = v;
237    }
238    if let Some(v) = &s.heartbeat_interval {
239        out.heartbeat_interval = v.clone();
240    }
241}
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246
247    fn scope() -> ConfigScope {
248        ConfigScope::default()
249    }
250
251    #[test]
252    fn empty_stack_gives_builtin_defaults() {
253        let (eff, warns) = resolve(None, &BTreeMap::new(), None, &[]);
254        assert_eq!(eff, EffectiveConfig::builtin_defaults());
255        assert!(warns.is_empty());
256    }
257
258    #[test]
259    fn global_only() {
260        let g = ConfigScope {
261            inventory_interval: Some("12h".into()),
262            heartbeat_interval: Some("60s".into()),
263            ..scope()
264        };
265        let (eff, _) = resolve(Some(&g), &BTreeMap::new(), None, &[]);
266        assert_eq!(eff.inventory_interval, "12h");
267        assert_eq!(eff.heartbeat_interval, "60s");
268        // Unset fields stay at builtin defaults.
269        assert_eq!(eff.inventory_jitter, "10m");
270        assert!(eff.inventory_enabled);
271        assert!(eff.target_version.is_none());
272    }
273
274    #[test]
275    fn group_overrides_global() {
276        let global = ConfigScope {
277            inventory_interval: Some("24h".into()),
278            ..scope()
279        };
280        let mut groups = BTreeMap::new();
281        groups.insert(
282            "canary".into(),
283            ConfigScope {
284                inventory_interval: Some("1h".into()),
285                ..scope()
286            },
287        );
288        let (eff, warns) = resolve(Some(&global), &groups, None, &["canary".into()]);
289        assert_eq!(eff.inventory_interval, "1h");
290        assert!(warns.is_empty());
291    }
292
293    #[test]
294    fn pc_overrides_group() {
295        let mut groups = BTreeMap::new();
296        groups.insert(
297            "wave1".into(),
298            ConfigScope {
299                inventory_interval: Some("12h".into()),
300                ..scope()
301            },
302        );
303        let pc = ConfigScope {
304            inventory_interval: Some("5m".into()),
305            ..scope()
306        };
307        let (eff, _) = resolve(None, &groups, Some(&pc), &["wave1".into()]);
308        assert_eq!(eff.inventory_interval, "5m");
309    }
310
311    #[test]
312    fn pc_overrides_global_when_no_group_match() {
313        let global = ConfigScope {
314            inventory_interval: Some("24h".into()),
315            ..scope()
316        };
317        let pc = ConfigScope {
318            inventory_interval: Some("30m".into()),
319            ..scope()
320        };
321        let (eff, _) = resolve(Some(&global), &BTreeMap::new(), Some(&pc), &[]);
322        assert_eq!(eff.inventory_interval, "30m");
323    }
324
325    #[test]
326    fn partial_override_only_changes_named_fields() {
327        let global = ConfigScope {
328            inventory_interval: Some("24h".into()),
329            heartbeat_interval: Some("30s".into()),
330            ..scope()
331        };
332        let pc = ConfigScope {
333            heartbeat_interval: Some("15s".into()),
334            // intentionally not touching inventory_interval
335            ..scope()
336        };
337        let (eff, _) = resolve(Some(&global), &BTreeMap::new(), Some(&pc), &[]);
338        assert_eq!(eff.inventory_interval, "24h"); // from global
339        assert_eq!(eff.heartbeat_interval, "15s"); // from pc
340    }
341
342    #[test]
343    fn multi_group_conflict_emits_warning() {
344        let mut groups = BTreeMap::new();
345        groups.insert(
346            "wave1".into(),
347            ConfigScope {
348                inventory_interval: Some("12h".into()),
349                ..scope()
350            },
351        );
352        groups.insert(
353            "dept-eng".into(),
354            ConfigScope {
355                inventory_interval: Some("24h".into()),
356                ..scope()
357            },
358        );
359        let (eff, warns) = resolve(None, &groups, None, &["wave1".into(), "dept-eng".into()]);
360        // "dept-eng" sorts before "wave1", so wave1 wins (last alphabetical).
361        assert_eq!(eff.inventory_interval, "12h");
362        assert_eq!(warns.len(), 1);
363        match &warns[0] {
364            ResolutionWarning::MultiGroupConflict { field, groups } => {
365                assert_eq!(*field, "inventory_interval");
366                assert_eq!(groups, &vec!["dept-eng".to_string(), "wave1".to_string()]);
367            }
368        }
369    }
370
371    #[test]
372    fn group_alphabetical_last_wins_no_conflict_when_only_one_sets() {
373        let mut groups = BTreeMap::new();
374        groups.insert(
375            "wave1".into(),
376            ConfigScope {
377                inventory_interval: Some("12h".into()),
378                ..scope()
379            },
380        );
381        groups.insert(
382            "dept-eng".into(),
383            ConfigScope {
384                // Different field — doesn't conflict.
385                heartbeat_interval: Some("15s".into()),
386                ..scope()
387            },
388        );
389        let (eff, warns) = resolve(None, &groups, None, &["wave1".into(), "dept-eng".into()]);
390        assert_eq!(eff.inventory_interval, "12h");
391        assert_eq!(eff.heartbeat_interval, "15s");
392        assert!(warns.is_empty());
393    }
394
395    #[test]
396    fn unknown_group_is_silently_ignored() {
397        // my_groups names a group that has no scope row yet. Common
398        // on the first agent that joins a freshly-named group; the
399        // resolver should treat it as a no-op, not an error.
400        let mut groups = BTreeMap::new();
401        groups.insert(
402            "canary".into(),
403            ConfigScope {
404                inventory_interval: Some("1h".into()),
405                ..scope()
406            },
407        );
408        let (eff, warns) = resolve(
409            None,
410            &groups,
411            None,
412            &["canary".into(), "ghost-group".into()],
413        );
414        assert_eq!(eff.inventory_interval, "1h");
415        assert!(warns.is_empty());
416    }
417
418    #[test]
419    fn group_scope_not_applied_when_pc_not_in_group() {
420        let mut groups = BTreeMap::new();
421        groups.insert(
422            "canary".into(),
423            ConfigScope {
424                target_version: Some("0.3.0".into()),
425                ..scope()
426            },
427        );
428        let (eff, _) = resolve(None, &groups, None, &["dept-eng".into()]);
429        // PC is NOT in canary, so the rollout target shouldn't apply.
430        assert!(eff.target_version.is_none());
431    }
432
433    #[test]
434    fn duplicate_group_names_dedup_silently() {
435        let mut groups = BTreeMap::new();
436        groups.insert(
437            "wave1".into(),
438            ConfigScope {
439                inventory_interval: Some("12h".into()),
440                ..scope()
441            },
442        );
443        // my_groups carries the same name twice — the dedup pass
444        // keeps it from looking like a conflict-with-self.
445        let (eff, warns) = resolve(None, &groups, None, &["wave1".into(), "wave1".into()]);
446        assert_eq!(eff.inventory_interval, "12h");
447        assert!(warns.is_empty());
448    }
449
450    #[test]
451    fn config_scope_serde_round_trip() {
452        let s = ConfigScope {
453            target_version: Some("0.3.0".into()),
454            heartbeat_interval: Some("15s".into()),
455            ..scope()
456        };
457        let json = serde_json::to_string(&s).unwrap();
458        // Only set fields appear in JSON.
459        assert_eq!(
460            json,
461            r#"{"target_version":"0.3.0","heartbeat_interval":"15s"}"#
462        );
463        let back: ConfigScope = serde_json::from_str(&json).unwrap();
464        assert_eq!(back, s);
465    }
466
467    #[test]
468    fn empty_config_scope_round_trips_as_empty_json() {
469        let s = ConfigScope::default();
470        assert!(s.is_empty());
471        let json = serde_json::to_string(&s).unwrap();
472        assert_eq!(json, "{}");
473        let back: ConfigScope = serde_json::from_str(&json).unwrap();
474        assert_eq!(back, s);
475    }
476
477    #[test]
478    fn deserialize_tolerates_unknown_fields_for_forward_compat() {
479        // Sprint 6+ may add fields (log_level, jitter strategy, …);
480        // older agent / backend builds should keep parsing.
481        let json = r#"{"target_version":"0.3.0","future_knob":"future_value"}"#;
482        let s: ConfigScope = serde_json::from_str(json).unwrap();
483        assert_eq!(s.target_version.as_deref(), Some("0.3.0"));
484    }
485
486    #[test]
487    fn pc_does_not_override_other_pcs() {
488        // Sanity: pc_scope passed in is by definition the row for THIS
489        // pc; the caller is responsible for picking the right one.
490        // This test guards against a future refactor that accidentally
491        // wires in the wrong scope by ensuring the apply happens last
492        // (after groups), so the PC value is the visible one.
493        let mut groups = BTreeMap::new();
494        groups.insert(
495            "wave1".into(),
496            ConfigScope {
497                inventory_interval: Some("12h".into()),
498                ..scope()
499            },
500        );
501        let pc = ConfigScope {
502            inventory_interval: Some("5m".into()),
503            ..scope()
504        };
505        let (eff, _) = resolve(None, &groups, Some(&pc), &["wave1".into()]);
506        assert_eq!(eff.inventory_interval, "5m");
507    }
508}