ff_core/caps.rs
1//! Backend-shared capability matching predicate (RFC-v0.7 Q7).
2//!
3//! **Why this module exists.** `ff-backend-valkey` and `ff-backend-postgres`
4//! both need to answer: "does worker W satisfy execution E's
5//! `required_capabilities`?" In Valkey that's a Rust short-circuit in the
6//! scheduler + authoritative Lua subset-check in `ff_issue_claim_grant`.
7//! In Postgres it's a `WHERE required_caps <@ worker_caps` GIN query.
8//! The *predicate* is identical across backends; only the storage
9//! (CSV field vs `text[]`) differs.
10//!
11//! This module owns the pure-Rust predicate so both backends share one
12//! definition + one test suite. The Valkey-side Lua (`lua/scheduling.lua`
13//! `parse_capability_csv` + `missing_capabilities`) remains the atomic
14//! authority inside the FCALL — this is a fast-path short-circuit.
15//!
16//! **Callers today:**
17//! - `ff-scheduler::claim` — Rust short-circuit before quota admission.
18//! - `ff-backend-postgres` (future) — direct predicate in the admission
19//! SQL / in-process query layer.
20//!
21//! **Wire shape.** `required_capabilities` is stored as a comma-separated
22//! CSV on the Valkey contract layer (`RoutingRequirements::required_capabilities`
23//! is serialized to CSV when written to the execution hash). Postgres will
24//! store as `text[]`. This module accepts both:
25//! - [`matches_csv`] — CSV-form required set (existing call site).
26//! - [`matches`] — structured [`CapabilityRequirement`] (future call sites).
27//!
28//! The predicate semantics are **case-sensitive subset**: every non-empty
29//! token in the required set must appear verbatim in the worker's
30//! [`CapabilitySet`]. An empty required set trivially matches any worker
31//! (backwards-compat default; see `RoutingRequirements` rustdoc).
32//!
33//! Bound constants ([`crate::policy::CAPS_MAX_BYTES`],
34//! [`crate::policy::CAPS_MAX_TOKENS`]) live in `ff-core::policy` and are
35//! enforced at ingress — not here. This module is a pure predicate.
36
37use crate::backend::CapabilitySet;
38use std::collections::BTreeSet;
39
40/// A pre-parsed capability requirement (the "execution requires X" shape).
41///
42/// Thin newtype over a sorted token set — sorted so CSV serialization is
43/// deterministic and log correlation is stable. The Valkey wire form is
44/// the CSV join of this set; the Postgres wire form is the `text[]`
45/// projection of `tokens`.
46///
47/// Constructed via [`CapabilityRequirement::new`] or parsed from CSV via
48/// [`CapabilityRequirement::from_csv`]. The struct is `#[non_exhaustive]`
49/// — additions (e.g. semver predicates, tier hints) land additively.
50#[derive(Clone, Debug, Default, PartialEq, Eq)]
51#[non_exhaustive]
52pub struct CapabilityRequirement {
53 /// Required tokens. Empty → match any worker.
54 pub tokens: BTreeSet<String>,
55}
56
57impl CapabilityRequirement {
58 /// Build from any iterable of string-like tokens.
59 ///
60 /// Empty strings are dropped (they can't satisfy anything and would
61 /// pollute the CSV form). Duplicates collapse via the `BTreeSet`.
62 pub fn new<I, S>(tokens: I) -> Self
63 where
64 I: IntoIterator<Item = S>,
65 S: Into<String>,
66 {
67 Self {
68 tokens: tokens
69 .into_iter()
70 .map(Into::into)
71 .filter(|t| !t.is_empty())
72 .collect(),
73 }
74 }
75
76 /// Parse from the Valkey wire form: comma-separated tokens.
77 ///
78 /// Mirrors Lua `parse_capability_csv` in `lua/scheduling.lua`:
79 /// empty tokens (from `",gpu,,cuda,"`) are dropped. No validation
80 /// of token contents — that's ingress's job (see
81 /// `ff-scheduler::claim` and `ff-sdk::FlowFabricWorker::connect`).
82 pub fn from_csv(csv: &str) -> Self {
83 Self::new(csv.split(',').filter(|t| !t.is_empty()))
84 }
85
86 /// True iff no tokens are required (matches any worker).
87 pub fn is_empty(&self) -> bool {
88 self.tokens.is_empty()
89 }
90}
91
92/// Subset predicate: every required token appears in the worker's set.
93///
94/// Case-sensitive, exact-string match. Empty `required` → `true`. This
95/// is the pure-Rust mirror of Lua `missing_capabilities` in
96/// `lua/scheduling.lua`.
97///
98/// Called from:
99/// - `ff-scheduler::claim` (fast-path short-circuit before quota admission)
100/// - `ff-backend-postgres` (future; direct predicate in admission path)
101pub fn matches(required: &CapabilityRequirement, worker: &CapabilitySet) -> bool {
102 if required.is_empty() {
103 return true;
104 }
105 // `CapabilitySet` is `Vec<String>` today; linear scan is fine for the
106 // bounded sizes (CAPS_MAX_TOKENS = 256). If that changes, promote to
107 // a HashSet lookup here.
108 let worker_tokens: &[String] = &worker.tokens;
109 required
110 .tokens
111 .iter()
112 .all(|t| worker_tokens.iter().any(|w| w == t))
113}
114
115/// CSV-form subset predicate. Used by `ff-scheduler::claim` so the HGET
116/// result can be fed in directly without parsing allocation.
117///
118/// Semantics identical to [`matches`]: every non-empty comma-separated
119/// token in `required_csv` must appear in `worker_caps`. Empty or
120/// all-separator CSV → `true`.
121///
122/// Kept as a separate entry point (rather than routing through
123/// [`CapabilityRequirement::from_csv`] + [`matches`]) to avoid the
124/// `BTreeSet` allocation on the scheduler hot path — the current call
125/// site already has a `&BTreeSet<String>` of worker caps in hand.
126pub fn matches_csv(required_csv: &str, worker_caps: &BTreeSet<String>) -> bool {
127 required_csv
128 .split(',')
129 .filter(|t| !t.is_empty())
130 .all(|t| worker_caps.contains(t))
131}
132
133#[cfg(test)]
134mod tests {
135 use super::*;
136
137 // ── matches_csv (scheduler hot path) ──
138
139 #[test]
140 fn empty_required_csv_matches_any_worker() {
141 let worker: BTreeSet<String> = BTreeSet::new();
142 assert!(matches_csv("", &worker));
143 assert!(matches_csv("", &BTreeSet::from(["gpu".to_owned()])));
144 }
145
146 #[test]
147 fn all_separator_csv_matches_any_worker() {
148 // Regression: ",,," must still parse as empty-required.
149 let worker = BTreeSet::from(["gpu".to_owned()]);
150 assert!(matches_csv(",,,", &worker));
151 }
152
153 #[test]
154 fn exact_match_csv() {
155 let worker = BTreeSet::from(["gpu".to_owned(), "cuda".to_owned()]);
156 assert!(matches_csv("gpu,cuda", &worker));
157 }
158
159 #[test]
160 fn subset_match_csv() {
161 let worker = BTreeSet::from([
162 "gpu".to_owned(),
163 "cuda".to_owned(),
164 "fp16".to_owned(),
165 ]);
166 assert!(matches_csv("gpu,cuda", &worker));
167 assert!(matches_csv("gpu", &worker));
168 }
169
170 #[test]
171 fn missing_token_rejects_csv() {
172 let worker = BTreeSet::from(["gpu".to_owned()]);
173 assert!(!matches_csv("gpu,cuda", &worker));
174 assert!(!matches_csv("cuda", &worker));
175 }
176
177 #[test]
178 fn case_sensitive_csv() {
179 // Documented: matching is case-sensitive. "GPU" ≠ "gpu".
180 // Ingress is expected to normalize if callers want case-insensitive.
181 let worker = BTreeSet::from(["gpu".to_owned()]);
182 assert!(!matches_csv("GPU", &worker));
183 assert!(matches_csv("gpu", &worker));
184 }
185
186 // ── matches (structured API) ──
187
188 #[test]
189 fn structured_empty_required_matches_any() {
190 let req = CapabilityRequirement::default();
191 let worker = CapabilitySet::default();
192 assert!(matches(&req, &worker));
193 assert!(matches(&req, &CapabilitySet::new(["gpu"])));
194 }
195
196 #[test]
197 fn structured_subset_match() {
198 let req = CapabilityRequirement::new(["gpu", "cuda"]);
199 let worker = CapabilitySet::new(["gpu", "cuda", "fp16"]);
200 assert!(matches(&req, &worker));
201 }
202
203 #[test]
204 fn structured_missing_token_rejects() {
205 let req = CapabilityRequirement::new(["gpu", "cuda"]);
206 let worker = CapabilitySet::new(["gpu"]);
207 assert!(!matches(&req, &worker));
208 }
209
210 #[test]
211 fn structured_case_sensitive() {
212 let req = CapabilityRequirement::new(["GPU"]);
213 let worker = CapabilitySet::new(["gpu"]);
214 assert!(!matches(&req, &worker));
215 }
216
217 #[test]
218 fn from_csv_drops_empty_tokens() {
219 let req = CapabilityRequirement::from_csv(",gpu,,cuda,");
220 assert_eq!(req.tokens.len(), 2);
221 assert!(req.tokens.contains("gpu"));
222 assert!(req.tokens.contains("cuda"));
223 }
224
225 #[test]
226 fn from_csv_empty_string_is_empty_requirement() {
227 let req = CapabilityRequirement::from_csv("");
228 assert!(req.is_empty());
229 }
230
231 // ── cross-API equivalence ──
232
233 #[test]
234 fn matches_and_matches_csv_agree() {
235 // Same logical input via both entry points must give same answer.
236 let cases = [
237 ("", vec!["gpu"], true),
238 ("gpu", vec!["gpu"], true),
239 ("gpu,cuda", vec!["gpu"], false),
240 ("gpu,cuda", vec!["gpu", "cuda", "fp16"], true),
241 (",gpu,", vec!["gpu"], true),
242 ("GPU", vec!["gpu"], false),
243 ];
244 for (req_csv, worker_tokens, expected) in cases {
245 let worker_btree: BTreeSet<String> =
246 worker_tokens.iter().map(|s| (*s).to_owned()).collect();
247 let worker_set = CapabilitySet::new(worker_tokens.iter().copied());
248 let req = CapabilityRequirement::from_csv(req_csv);
249
250 assert_eq!(
251 matches_csv(req_csv, &worker_btree),
252 expected,
253 "matches_csv({req_csv:?}) mismatch"
254 );
255 assert_eq!(
256 matches(&req, &worker_set),
257 expected,
258 "matches({req_csv:?}) mismatch"
259 );
260 }
261 }
262}