Skip to main content

ai_memory/federation/identity/
inventory.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4//! Declarative federation inventory — the GitOps source of truth for
5//! fleet membership, trust topology, and enforcement posture.
6//!
7//! Historically federation trust lived in on-disk `.pub` keyfiles placed
8//! by hand: no reviewable, version-controlled description of *who is in
9//! the fleet*. This module parses a declarative YAML inventory (a file
10//! reviewed in a repo) into a validated [`FederationInventory`] that the
11//! P3c reconciler diffs against observed live state.
12//!
13//! ## Why strict parsing
14//!
15//! Unlike an MCP tool-request struct (which stays permissive per #1052 so
16//! the wire schema is truthful for heterogeneous hosts), this is an
17//! operator-authored *trust* config. A silently-dropped typo —
18//! `requir_sig:` parsing as the default `require_sig: false` — would
19//! quietly weaken enforcement. So every struct here carries
20//! `#[serde(deny_unknown_fields)]`: an unrecognised key is a hard parse
21//! error the operator sees at load time, mirroring the inline-API-key
22//! rejection in [`crate::config`]. (The #1052 honesty pin only scans the
23//! MCP `tool_definitions()` payload, so this strictness is out of its
24//! scope.)
25//!
26//! ## Reuse, not reinvention
27//!
28//! Node ids are validated with [`crate::validate::validate_agent_id_shape`]
29//! (the SPIFFE-tolerant, path-traversal-guarded shape check every identity
30//! string already passes), and durations with
31//! [`crate::config::parse_duration_string`] (the same `<int><unit>` form
32//! operators type into `--since` flags). No bespoke regex, no second
33//! duration grammar.
34
35use std::collections::BTreeSet;
36use std::path::Path;
37
38use serde::Deserialize;
39
40/// Environment variable naming the inventory file. Part of the
41/// `AI_MEMORY_FED_*` family (cf. [`super::resolver::FED_IDENTITY_ENV`]).
42pub const FED_INVENTORY_PATH_ENV: &str = "AI_MEMORY_FED_INVENTORY_PATH";
43
44/// Smallest sensible quorum width. A width of zero would mean "no peer
45/// acknowledgement required", collapsing the W=N durability guarantee.
46pub const MIN_QUORUM_WIDTH: u32 = 1;
47
48/// How a node proves its identity to the issuer before a credential is
49/// minted. Kebab-case on the wire so the YAML reads `attestor: mtls-cert`.
50#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
51#[serde(rename_all = "kebab-case", deny_unknown_fields)]
52pub enum AttestorMethod {
53    /// The node presents its existing mTLS client certificate; the
54    /// issuer maps the verified CN/SAN to the requested agent-id
55    /// (ADR-001 Decision 4 — the Phase-2 default, no new secret-handling).
56    MtlsCert,
57    /// A pluggable node-identity attestor (cloud instance identity, k8s
58    /// service account, TPM). Trait seam only at v0.7.0; declaring it in
59    /// an inventory is accepted but the backend is a later phase.
60    NodePlugin,
61}
62
63/// One node's desired membership facts.
64#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
65#[serde(deny_unknown_fields)]
66pub struct NodeSpec {
67    /// SPIFFE-style agent id this node signs and presents as. Validated
68    /// with [`crate::validate::validate_agent_id_shape`].
69    pub id: String,
70    /// How the node attests its identity to the issuer.
71    pub attestor: AttestorMethod,
72    /// Credential lifetime, `<int><unit>` (e.g. `1h`, `30m`). Parsed with
73    /// [`crate::config::parse_duration_string`].
74    pub cred_ttl: String,
75    /// Lead window before expiry within which the node renews, same form
76    /// as `cred_ttl`. Must be strictly shorter than `cred_ttl`.
77    pub renew_before: String,
78    /// Optional free-form roles (e.g. `writer`, `reader`). Not interpreted
79    /// here; carried for the reconciler / future RBAC.
80    #[serde(default)]
81    pub roles: Vec<String>,
82}
83
84/// One region: an intermediate-CA scope containing nodes.
85#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
86#[serde(deny_unknown_fields)]
87pub struct RegionSpec {
88    /// Region name (e.g. `nyc`). Non-empty.
89    pub name: String,
90    /// Reference to this region's intermediate CA (hierarchical trust,
91    /// Phase 4). Optional at v0.7.0 — a single-tier fleet roots every
92    /// node at the trust-domain root.
93    #[serde(default)]
94    pub intermediate_ca: Option<String>,
95    /// Member nodes.
96    #[serde(default)]
97    pub nodes: Vec<NodeSpec>,
98}
99
100/// Quorum width — the `W` in W-of-N federated writes.
101#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
102#[serde(deny_unknown_fields)]
103pub struct QuorumSpec {
104    /// Number of acknowledgements a write must collect. `>= MIN_QUORUM_WIDTH`.
105    pub width: u32,
106}
107
108/// Fleet enforcement posture.
109#[derive(Debug, Clone, PartialEq, Eq, Default, Deserialize)]
110#[serde(deny_unknown_fields)]
111pub struct EnforcementSpec {
112    /// Whether receivers reject unsigned posts. Maps to
113    /// `AI_MEMORY_FED_REQUIRE_SIG`. Defaults to `false` so an inventory
114    /// that omits the block keeps the permissive rollout posture.
115    #[serde(default)]
116    pub require_sig: bool,
117}
118
119/// The declarative inventory: desired fleet membership + trust topology +
120/// enforcement, as parsed from YAML and semantically validated.
121#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
122#[serde(deny_unknown_fields)]
123pub struct FederationInventory {
124    /// The trust-domain name every credential is scoped to. Non-empty.
125    pub trust_domain: String,
126    /// Reference to the trust-domain root CA. Optional — a brand-new
127    /// inventory describing membership before the CA exists still parses.
128    #[serde(default)]
129    pub root_ca: Option<String>,
130    /// Regional groupings of nodes.
131    #[serde(default)]
132    pub regions: Vec<RegionSpec>,
133    /// Quorum width.
134    pub quorum: QuorumSpec,
135    /// Enforcement posture (defaults to permissive when omitted).
136    #[serde(default)]
137    pub enforcement: EnforcementSpec,
138}
139
140/// Reasons an inventory fails to load or validate.
141#[derive(Debug, Clone, PartialEq, Eq)]
142pub enum InventoryError {
143    /// The YAML could not be parsed (syntax error or unknown field).
144    Parse(String),
145    /// The file could not be read.
146    Io(String),
147    /// `trust_domain` is empty / whitespace.
148    EmptyTrustDomain,
149    /// A region carried an empty `name`.
150    EmptyRegionName,
151    /// A node id failed the agent-id shape check.
152    InvalidNodeId { id: String, reason: String },
153    /// A `cred_ttl` / `renew_before` value did not parse to a positive
154    /// duration.
155    InvalidDuration {
156        node: String,
157        field: &'static str,
158        value: String,
159    },
160    /// `renew_before` was not strictly shorter than `cred_ttl` — the node
161    /// would consider itself due for renewal before it ever held a valid
162    /// window.
163    RenewBeforeNotShorterThanTtl { node: String },
164    /// The same node id appeared more than once — an ambiguous identity.
165    DuplicateNodeId { id: String },
166    /// `quorum.width` is below [`MIN_QUORUM_WIDTH`].
167    InvalidQuorumWidth { width: u32 },
168}
169
170impl InventoryError {
171    /// Stable machine-readable tag for logs + JSON error envelopes.
172    #[must_use]
173    pub fn tag(&self) -> &'static str {
174        match self {
175            Self::Parse(_) => "inventory_parse_error",
176            Self::Io(_) => "inventory_io_error",
177            Self::EmptyTrustDomain => "inventory_empty_trust_domain",
178            Self::EmptyRegionName => "inventory_empty_region_name",
179            Self::InvalidNodeId { .. } => "inventory_invalid_node_id",
180            Self::InvalidDuration { .. } => "inventory_invalid_duration",
181            Self::RenewBeforeNotShorterThanTtl { .. } => "inventory_renew_before_not_shorter",
182            Self::DuplicateNodeId { .. } => "inventory_duplicate_node_id",
183            Self::InvalidQuorumWidth { .. } => "inventory_invalid_quorum_width",
184        }
185    }
186}
187
188impl std::fmt::Display for InventoryError {
189    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
190        match self {
191            Self::Parse(msg) | Self::Io(msg) => write!(f, "{} ({msg})", self.tag()),
192            Self::InvalidNodeId { id, reason } => {
193                write!(f, "{} (id={id}: {reason})", self.tag())
194            }
195            Self::InvalidDuration { node, field, value } => {
196                write!(f, "{} (node={node} {field}={value})", self.tag())
197            }
198            Self::RenewBeforeNotShorterThanTtl { node } => {
199                write!(f, "{} (node={node})", self.tag())
200            }
201            Self::DuplicateNodeId { id } => write!(f, "{} (id={id})", self.tag()),
202            Self::InvalidQuorumWidth { width } => write!(f, "{} (width={width})", self.tag()),
203            _ => f.write_str(self.tag()),
204        }
205    }
206}
207
208impl std::error::Error for InventoryError {}
209
210impl FederationInventory {
211    /// Parse + validate an inventory from a YAML string.
212    ///
213    /// # Errors
214    /// [`InventoryError::Parse`] on malformed / unknown-field YAML, or any
215    /// of the semantic variants from [`validate`](Self::validate).
216    pub fn from_yaml_str(yaml: &str) -> Result<Self, InventoryError> {
217        let inventory: Self =
218            serde_yaml::from_str(yaml).map_err(|e| InventoryError::Parse(e.to_string()))?;
219        inventory.validate()?;
220        Ok(inventory)
221    }
222
223    /// Load + validate an inventory from a file path.
224    ///
225    /// # Errors
226    /// [`InventoryError::Io`] if the file cannot be read; otherwise the
227    /// same errors as [`from_yaml_str`](Self::from_yaml_str).
228    pub fn load_from_path(path: &Path) -> Result<Self, InventoryError> {
229        let raw = std::fs::read_to_string(path).map_err(|e| InventoryError::Io(e.to_string()))?;
230        Self::from_yaml_str(&raw)
231    }
232
233    /// Load the inventory named by [`FED_INVENTORY_PATH_ENV`].
234    ///
235    /// `Ok(None)` when the env var is unset — running without a declarative
236    /// inventory is the normal pre-adoption state, not an error.
237    ///
238    /// # Errors
239    /// The same errors as [`load_from_path`](Self::load_from_path) when the
240    /// env var IS set but the file is missing / malformed / invalid.
241    pub fn load_from_env() -> Result<Option<Self>, InventoryError> {
242        match std::env::var(FED_INVENTORY_PATH_ENV) {
243            Ok(path) => Self::load_from_path(Path::new(&path)).map(Some),
244            Err(_) => Ok(None),
245        }
246    }
247
248    /// Iterate every node across all regions.
249    pub fn nodes(&self) -> impl Iterator<Item = &NodeSpec> {
250        self.regions.iter().flat_map(|r| r.nodes.iter())
251    }
252
253    /// Semantic validation beyond what serde shape-checks: non-empty
254    /// trust domain + region names, valid node ids, parsable + sane
255    /// durations, unique ids, and a sane quorum width.
256    ///
257    /// # Errors
258    /// One of the semantic [`InventoryError`] variants on the first
259    /// violation found.
260    pub fn validate(&self) -> Result<(), InventoryError> {
261        if self.trust_domain.trim().is_empty() {
262            return Err(InventoryError::EmptyTrustDomain);
263        }
264        if self.quorum.width < MIN_QUORUM_WIDTH {
265            return Err(InventoryError::InvalidQuorumWidth {
266                width: self.quorum.width,
267            });
268        }
269        let mut seen_ids: BTreeSet<&str> = BTreeSet::new();
270        for region in &self.regions {
271            if region.name.trim().is_empty() {
272                return Err(InventoryError::EmptyRegionName);
273            }
274            for node in &region.nodes {
275                node.validate()?;
276                if !seen_ids.insert(node.id.as_str()) {
277                    return Err(InventoryError::DuplicateNodeId {
278                        id: node.id.clone(),
279                    });
280                }
281            }
282        }
283        Ok(())
284    }
285}
286
287impl NodeSpec {
288    /// The credential lifetime as a [`chrono::Duration`].
289    #[must_use]
290    pub fn cred_ttl_duration(&self) -> Option<chrono::Duration> {
291        crate::config::parse_duration_string(&self.cred_ttl)
292    }
293
294    /// The renewal lead window as a [`chrono::Duration`].
295    #[must_use]
296    pub fn renew_before_duration(&self) -> Option<chrono::Duration> {
297        crate::config::parse_duration_string(&self.renew_before)
298    }
299
300    /// Validate a single node's id + durations.
301    fn validate(&self) -> Result<(), InventoryError> {
302        crate::validate::validate_agent_id_shape(&self.id).map_err(|e| {
303            InventoryError::InvalidNodeId {
304                id: self.id.clone(),
305                reason: e.to_string(),
306            }
307        })?;
308        let ttl = positive_duration(self.cred_ttl_duration()).ok_or_else(|| {
309            InventoryError::InvalidDuration {
310                node: self.id.clone(),
311                field: "cred_ttl",
312                value: self.cred_ttl.clone(),
313            }
314        })?;
315        let renew = positive_duration(self.renew_before_duration()).ok_or_else(|| {
316            InventoryError::InvalidDuration {
317                node: self.id.clone(),
318                field: "renew_before",
319                value: self.renew_before.clone(),
320            }
321        })?;
322        if renew >= ttl {
323            return Err(InventoryError::RenewBeforeNotShorterThanTtl {
324                node: self.id.clone(),
325            });
326        }
327        Ok(())
328    }
329}
330
331/// `Some(d)` when `d` is present and strictly positive, else `None`. A
332/// zero/negative TTL or renewal window is meaningless and rejected.
333fn positive_duration(d: Option<chrono::Duration>) -> Option<chrono::Duration> {
334    d.filter(|d| *d > chrono::Duration::zero())
335}
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340
341    const SAMPLE: &str = "\
342trust_domain: fleet.example
343root_ca: root.pub
344regions:
345  - name: nyc
346    intermediate_ca: nyc-int.pub
347    nodes:
348      - id: region/nyc/node-1
349        attestor: mtls-cert
350        cred_ttl: 1h
351        renew_before: 15m
352        roles: [writer, reader]
353      - id: region/nyc/node-2
354        attestor: node-plugin
355        cred_ttl: 2h
356        renew_before: 30m
357  - name: sfo
358    nodes:
359      - id: region/sfo/node-1
360        attestor: mtls-cert
361        cred_ttl: 1h
362        renew_before: 10m
363quorum:
364  width: 2
365enforcement:
366  require_sig: true
367";
368
369    #[test]
370    fn parses_and_validates_a_full_inventory() {
371        let inv = FederationInventory::from_yaml_str(SAMPLE).expect("valid");
372        assert_eq!(inv.trust_domain, "fleet.example");
373        assert_eq!(inv.root_ca.as_deref(), Some("root.pub"));
374        assert_eq!(inv.regions.len(), 2);
375        assert_eq!(inv.quorum.width, 2);
376        assert!(inv.enforcement.require_sig);
377        assert_eq!(inv.nodes().count(), 3);
378        let first = inv.nodes().next().expect("a node");
379        assert_eq!(first.id, "region/nyc/node-1");
380        assert_eq!(first.attestor, AttestorMethod::MtlsCert);
381        assert_eq!(first.roles, vec!["writer", "reader"]);
382        assert_eq!(first.cred_ttl_duration(), Some(chrono::Duration::hours(1)));
383        assert_eq!(
384            first.renew_before_duration(),
385            Some(chrono::Duration::minutes(15))
386        );
387    }
388
389    #[test]
390    fn enforcement_defaults_to_permissive_when_omitted() {
391        let yaml = "\
392trust_domain: d
393quorum:
394  width: 1
395";
396        let inv = FederationInventory::from_yaml_str(yaml).expect("valid");
397        assert!(!inv.enforcement.require_sig);
398        assert_eq!(inv.nodes().count(), 0);
399    }
400
401    #[test]
402    fn unknown_field_is_a_hard_parse_error() {
403        let yaml = "\
404trust_domain: d
405quorum:
406  width: 1
407enforcement:
408  requir_sig: true
409";
410        let err = FederationInventory::from_yaml_str(yaml).expect_err("typo must fail");
411        assert_eq!(err.tag(), "inventory_parse_error");
412    }
413
414    #[test]
415    fn empty_trust_domain_is_rejected() {
416        let yaml = "\
417trust_domain: '   '
418quorum:
419  width: 1
420";
421        let err = FederationInventory::from_yaml_str(yaml).expect_err("empty domain");
422        assert_eq!(err, InventoryError::EmptyTrustDomain);
423    }
424
425    #[test]
426    fn zero_quorum_width_is_rejected() {
427        let yaml = "\
428trust_domain: d
429quorum:
430  width: 0
431";
432        let err = FederationInventory::from_yaml_str(yaml).expect_err("zero width");
433        assert_eq!(err, InventoryError::InvalidQuorumWidth { width: 0 });
434    }
435
436    #[test]
437    fn path_traversal_node_id_is_rejected() {
438        let yaml = "\
439trust_domain: d
440regions:
441  - name: r
442    nodes:
443      - id: ../../etc/secret
444        attestor: mtls-cert
445        cred_ttl: 1h
446        renew_before: 5m
447quorum:
448  width: 1
449";
450        let err = FederationInventory::from_yaml_str(yaml).expect_err("traversal");
451        assert_eq!(err.tag(), "inventory_invalid_node_id");
452    }
453
454    #[test]
455    fn unparsable_duration_is_rejected() {
456        let yaml = "\
457trust_domain: d
458regions:
459  - name: r
460    nodes:
461      - id: node-1
462        attestor: mtls-cert
463        cred_ttl: soon
464        renew_before: 5m
465quorum:
466  width: 1
467";
468        let err = FederationInventory::from_yaml_str(yaml).expect_err("bad ttl");
469        assert_eq!(
470            err,
471            InventoryError::InvalidDuration {
472                node: "node-1".to_string(),
473                field: "cred_ttl",
474                value: "soon".to_string(),
475            }
476        );
477    }
478
479    #[test]
480    fn renew_before_not_shorter_than_ttl_is_rejected() {
481        let yaml = "\
482trust_domain: d
483regions:
484  - name: r
485    nodes:
486      - id: node-1
487        attestor: mtls-cert
488        cred_ttl: 1h
489        renew_before: 1h
490quorum:
491  width: 1
492";
493        let err = FederationInventory::from_yaml_str(yaml).expect_err("renew>=ttl");
494        assert_eq!(
495            err,
496            InventoryError::RenewBeforeNotShorterThanTtl {
497                node: "node-1".to_string()
498            }
499        );
500    }
501
502    #[test]
503    fn duplicate_node_id_across_regions_is_rejected() {
504        let yaml = "\
505trust_domain: d
506regions:
507  - name: r1
508    nodes:
509      - id: dup
510        attestor: mtls-cert
511        cred_ttl: 1h
512        renew_before: 5m
513  - name: r2
514    nodes:
515      - id: dup
516        attestor: mtls-cert
517        cred_ttl: 1h
518        renew_before: 5m
519quorum:
520  width: 1
521";
522        let err = FederationInventory::from_yaml_str(yaml).expect_err("dup id");
523        assert_eq!(
524            err,
525            InventoryError::DuplicateNodeId {
526                id: "dup".to_string()
527            }
528        );
529    }
530
531    #[test]
532    fn empty_region_name_is_rejected() {
533        let yaml = "\
534trust_domain: d
535regions:
536  - name: '  '
537    nodes: []
538quorum:
539  width: 1
540";
541        let err = FederationInventory::from_yaml_str(yaml).expect_err("empty region");
542        assert_eq!(err, InventoryError::EmptyRegionName);
543    }
544
545    #[test]
546    fn load_from_env_unset_is_none() {
547        // SAFETY: single-threaded test; no other thread reads this var.
548        unsafe { std::env::remove_var(FED_INVENTORY_PATH_ENV) };
549        assert_eq!(FederationInventory::load_from_env().expect("ok"), None);
550    }
551}