1use rand::Rng;
2use rand::seq::{IndexedRandom, SliceRandom};
3use serde::{Deserialize, Serialize};
4use std::collections::{HashMap, HashSet};
5
6use crate::metadata::{METADATA_DELIMITER, MetadataKey};
7use crate::types::KvpValue;
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct KvpField {
12 key: String,
13 values: Vec<KvpValue>,
14 presence: f32,
15}
16
17impl KvpField {
18 pub fn one(key: impl Into<String>, value: impl Into<String>) -> Self {
20 Self::many(key, [value])
21 }
22
23 pub fn many<K, V, I>(key: K, values: I) -> Self
25 where
26 K: Into<String>,
27 I: IntoIterator<Item = V>,
28 V: Into<String>,
29 {
30 let mut seen = HashSet::new();
31 let mut collected = Vec::new();
32 for value in values.into_iter() {
33 let value = value.into();
34 if value.is_empty() {
35 continue;
36 }
37 if seen.insert(value.clone()) {
38 collected.push(value);
39 }
40 }
41 Self {
42 key: key.into(),
43 values: collected,
44 presence: 1.0,
45 }
46 }
47
48 pub fn with_presence(mut self, probability: f32) -> Self {
50 self.presence = probability.clamp(0.0, 1.0);
51 self
52 }
53
54 fn is_empty(&self) -> bool {
55 self.values.is_empty()
56 }
57
58 fn render<R: Rng + ?Sized>(&self, rng: &mut R) -> Option<String> {
59 if self.presence <= 0.0 {
60 return None;
61 }
62 if self.presence < 1.0 && rng.random::<f32>() >= self.presence {
63 return None;
64 }
65 self.values
66 .choose(rng)
67 .map(|value| format!("{}{}{}", self.key, METADATA_DELIMITER, value))
68 }
69}
70
71#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct KvpPrefixSampler {
74 dropout: f32,
75 variants: Vec<Vec<KvpField>>,
76}
77
78impl KvpPrefixSampler {
79 pub fn new(dropout: f32) -> Self {
81 Self {
82 dropout: dropout.clamp(0.0, 1.0),
83 variants: Vec::new(),
84 }
85 }
86
87 pub fn add_variant<K, V, I>(&mut self, fields: I)
89 where
90 I: IntoIterator<Item = (K, V)>,
91 K: Into<String>,
92 V: Into<String>,
93 {
94 let variant = fields
95 .into_iter()
96 .map(|(key, value)| KvpField::one(key, value))
97 .collect::<Vec<_>>();
98 self.add_variant_fields(variant);
99 }
100
101 pub fn add_variant_fields<I>(&mut self, fields: I)
103 where
104 I: IntoIterator<Item = KvpField>,
105 {
106 let mut variant = Vec::new();
107 for field in fields.into_iter() {
108 if !field.is_empty() {
109 variant.push(field);
110 }
111 }
112 if variant.is_empty() {
113 return;
114 }
115 self.variants.push(variant);
116 }
117
118 pub fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Option<String> {
120 if self.variants.is_empty() || self.dropout <= 0.0 {
121 return None;
122 }
123 if self.dropout < 1.0 && rng.random::<f32>() >= self.dropout {
124 return None;
125 }
126 self.variants
127 .choose(rng)
128 .and_then(|variant| format_variant(variant, rng))
129 }
130
131 pub fn all_metadata(&self) -> HashMap<String, Vec<String>> {
137 let mut map: HashMap<String, Vec<String>> = HashMap::new();
138 for variant in &self.variants {
139 for field in variant {
140 let entry = map.entry(field.key.clone()).or_default();
141 for value in &field.values {
142 if !entry.contains(value) {
143 entry.push(value.clone());
144 }
145 }
146 }
147 }
148 map
149 }
150}
151
152pub struct MetaFieldSpec<Ctx> {
154 key: MetadataKey,
155 presence: f32,
156 values_builder: fn(&Ctx) -> Vec<KvpValue>,
157}
158
159impl<Ctx> MetaFieldSpec<Ctx> {
160 pub const fn new(
162 key: MetadataKey,
163 presence: f32,
164 values_builder: fn(&Ctx) -> Vec<KvpValue>,
165 ) -> Self {
166 Self {
167 key,
168 presence,
169 values_builder,
170 }
171 }
172
173 fn build(&self, ctx: &Ctx) -> KvpField {
174 let values = (self.values_builder)(ctx);
175 KvpField::many(self.key.as_str(), values).with_presence(self.presence)
176 }
177}
178
179pub struct MetaPolicy<Ctx: 'static> {
181 fields: &'static [MetaFieldSpec<Ctx>],
182}
183
184impl<Ctx: 'static> MetaPolicy<Ctx> {
185 pub const fn new(fields: &'static [MetaFieldSpec<Ctx>]) -> Self {
187 Self { fields }
188 }
189
190 pub fn instantiate(&self, ctx: &Ctx) -> KvpPrefixSampler {
192 let built_fields = self
193 .fields
194 .iter()
195 .map(|field| field.build(ctx))
196 .collect::<Vec<_>>();
197 let mut sampler = KvpPrefixSampler::new(1.0);
198 sampler.add_variant_fields(built_fields);
199 sampler
200 }
201}
202
203fn format_variant<R: Rng + ?Sized>(fields: &[KvpField], rng: &mut R) -> Option<String> {
204 let mut body = Vec::new();
205 for field in fields {
206 if let Some(rendered) = field.render(rng) {
207 body.push(rendered);
208 }
209 }
210 if body.is_empty() {
211 return None;
212 }
213 if body.len() > 1 {
214 body.shuffle(rng);
215 }
216 Some(format!("meta: {}", body.join(" | ")))
217}
218
219#[cfg(test)]
220mod tests {
221 use super::*;
222 use rand::SeedableRng;
223 use rand::rngs::StdRng;
224
225 #[test]
226 fn respects_dropout_probability() {
227 let mut sampler = KvpPrefixSampler::new(1.0);
228 sampler.add_variant([("foo", "bar")]);
229 let mut rng = StdRng::from_seed([0_u8; 32]);
230 assert_eq!(sampler.sample(&mut rng), Some("meta: foo=bar".into()));
231
232 let mut zero_sampler = KvpPrefixSampler::new(0.0);
233 zero_sampler.add_variant([("foo", "bar")]);
234 assert!(zero_sampler.sample(&mut rng).is_none());
235 }
236
237 #[test]
238 fn chooses_between_variants() {
239 let mut sampler = KvpPrefixSampler::new(1.0);
240 sampler.add_variant([("a", "1")]);
241 sampler.add_variant([("b", "2")]);
242 let mut rng = StdRng::from_seed([1_u8; 32]);
243 let first = sampler.sample(&mut rng).unwrap();
244 assert!(first == "meta: a=1" || first == "meta: b=2");
245 }
246
247 #[test]
248 fn ignores_empty_variants() {
249 let mut sampler = KvpPrefixSampler::new(1.0);
250 sampler.add_variant([] as [(&str, &str); 0]);
251 let mut rng = StdRng::from_seed([2_u8; 32]);
252 assert!(sampler.sample(&mut rng).is_none());
253 }
254
255 #[test]
256 fn field_value_options_are_deduped_and_randomized() {
257 let field = KvpField::many("date", ["2025-01-01", "Jan 1, 2025", "2025-01-01"]);
258 assert_eq!(field.key, "date");
259 assert_eq!(field.values, vec!["2025-01-01", "Jan 1, 2025"]);
260
261 let mut rng = StdRng::from_seed([3_u8; 32]);
262 let first = field.render(&mut rng).unwrap();
263 let second = field.render(&mut rng).unwrap();
264 assert!(first == "date=2025-01-01" || first == "date=Jan 1, 2025");
265 assert!(second == "date=2025-01-01" || second == "date=Jan 1, 2025");
266 }
267
268 #[test]
269 fn sampler_handles_multi_value_fields() {
270 let mut sampler = KvpPrefixSampler::new(1.0);
271 sampler.add_variant_fields([
272 KvpField::many("date", ["2025-01-01", "Jan 1, 2025"]),
273 KvpField::one("article", "ceo-update"),
274 ]);
275 let mut rng = StdRng::from_seed([4_u8; 32]);
276 let mut outputs = Vec::new();
277 for _ in 0..20 {
278 if let Some(sample) = sampler.sample(&mut rng) {
279 outputs.push(sample);
280 }
281 }
282 outputs.sort();
283 outputs.dedup();
284 assert!(outputs.len() >= 2);
285 assert!(
286 outputs
287 .iter()
288 .any(|value| value.contains("date=2025-01-01")
289 && value.contains("article=ceo-update"))
290 );
291 assert!(outputs.iter().any(
292 |value| value.contains("date=Jan 1, 2025") && value.contains("article=ceo-update")
293 ));
294 }
295
296 #[test]
297 fn sampler_can_shuffle_field_order() {
298 let mut sampler = KvpPrefixSampler::new(1.0);
299 sampler.add_variant_fields([KvpField::one("alpha", "1"), KvpField::one("beta", "2")]);
300 let mut rng = StdRng::from_seed([5_u8; 32]);
301 let mut seen = std::collections::HashSet::new();
302 for _ in 0..20 {
303 if let Some(sample) = sampler.sample(&mut rng) {
304 seen.insert(sample);
305 }
306 }
307 assert!(seen.contains("meta: alpha=1 | beta=2"));
308 assert!(seen.contains("meta: beta=2 | alpha=1"));
309 }
310
311 #[test]
312 fn field_presence_controls_dropout() {
313 let absent = KvpField::one("foo", "bar").with_presence(0.0);
314 let mut rng = StdRng::from_seed([6_u8; 32]);
315 assert!(absent.render(&mut rng).is_none());
316
317 let present = KvpField::one("foo", "bar").with_presence(1.0);
318 let mut rng2 = StdRng::from_seed([7_u8; 32]);
319 assert_eq!(present.render(&mut rng2), Some("foo=bar".into()));
320 }
321
322 #[test]
323 fn sampler_and_field_probabilities_are_clamped() {
324 let mut always = KvpPrefixSampler::new(2.0);
325 always.add_variant([("k", "v")]);
326 let mut rng = StdRng::from_seed([8_u8; 32]);
327 assert!(always.sample(&mut rng).is_some());
328
329 let mut never = KvpPrefixSampler::new(-1.0);
330 never.add_variant([("k", "v")]);
331 assert!(never.sample(&mut rng).is_none());
332
333 let field = KvpField::one("a", "b").with_presence(2.0);
334 assert_eq!(field.presence, 1.0);
335 let field = KvpField::one("a", "b").with_presence(-5.0);
336 assert_eq!(field.presence, 0.0);
337 }
338
339 #[test]
340 fn variant_with_only_absent_fields_returns_none() {
341 let mut sampler = KvpPrefixSampler::new(1.0);
342 sampler.add_variant_fields([
343 KvpField::one("foo", "bar").with_presence(0.0),
344 KvpField::many("empty", [""]).with_presence(1.0),
345 ]);
346 let mut rng = StdRng::from_seed([9_u8; 32]);
347 assert!(sampler.sample(&mut rng).is_none());
348 }
349
350 #[derive(Clone)]
351 struct DemoCtx {
352 date: &'static str,
353 source: &'static str,
354 }
355
356 fn date_values(ctx: &DemoCtx) -> Vec<KvpValue> {
357 vec![ctx.date.into()]
358 }
359
360 fn source_values(ctx: &DemoCtx) -> Vec<KvpValue> {
361 vec![ctx.source.into()]
362 }
363
364 const DEMO_DATE_KEY: MetadataKey = MetadataKey::new("date");
365 const DEMO_SOURCE_KEY: MetadataKey = MetadataKey::new("source");
366
367 const POLICY_FIELDS: [MetaFieldSpec<DemoCtx>; 2] = [
368 MetaFieldSpec::new(DEMO_DATE_KEY, 1.0, date_values),
369 MetaFieldSpec::new(DEMO_SOURCE_KEY, 1.0, source_values),
370 ];
371
372 #[test]
373 fn meta_policy_instantiates_sampler_with_context_values() {
374 let policy = MetaPolicy::new(&POLICY_FIELDS);
375 let ctx = DemoCtx {
376 date: "2026-02-24",
377 source: "reports",
378 };
379 let sampler = policy.instantiate(&ctx);
380 let mut rng = StdRng::from_seed([10_u8; 32]);
381 let out = sampler.sample(&mut rng).unwrap();
382 assert!(out.contains("date=2026-02-24"));
383 assert!(out.contains("source=reports"));
384 }
385
386 #[test]
387 fn kvp_sampler_fractional_dropout_sometimes_suppresses_output() {
388 let mut sampler = KvpPrefixSampler::new(0.5);
390 sampler.add_variant([("k", "v")]);
391 let mut rng = StdRng::from_seed([77_u8; 32]);
392 let results: Vec<_> = (0..100).map(|_| sampler.sample(&mut rng)).collect();
393 assert!(
394 results.iter().any(|r| r.is_none()),
395 "dropout=0.5 should suppress some outputs"
396 );
397 assert!(
398 results.iter().any(|r| r.is_some()),
399 "dropout=0.5 should pass some outputs"
400 );
401 }
402
403 #[test]
404 fn meta_field_spec_new_is_callable_at_runtime() {
405 fn values(_: &()) -> Vec<KvpValue> {
408 vec!["runtime_val".to_string()]
409 }
410 let key = MetadataKey::new("runtime_key");
411 let spec = MetaFieldSpec::<()>::new(key, 1.0, values);
412 let field = spec.build(&());
413 let mut rng = StdRng::from_seed([42_u8; 32]);
414 assert!(field.render(&mut rng).is_some());
415 }
416
417 #[test]
420 fn all_metadata_empty_when_no_variants() {
421 let sampler = KvpPrefixSampler::new(1.0);
422 assert!(sampler.all_metadata().is_empty());
423 }
424
425 #[test]
426 fn all_metadata_collects_all_keys_and_values_regardless_of_dropout() {
427 let mut sampler = KvpPrefixSampler::new(0.0);
430 sampler.add_variant_fields([
431 KvpField::many("date", ["2025-01-01", "Jan 1, 2025"]),
432 KvpField::one("source", "daily-report"),
433 ]);
434
435 let meta = sampler.all_metadata();
436 assert_eq!(meta.len(), 2);
437
438 let dates = &meta["date"];
439 assert_eq!(dates.len(), 2);
440 assert!(dates.contains(&"2025-01-01".to_string()));
441 assert!(dates.contains(&"Jan 1, 2025".to_string()));
442
443 assert_eq!(meta["source"], vec!["daily-report"]);
444 }
445
446 #[test]
447 fn all_metadata_collects_keys_across_variants_and_deduplicates_values() {
448 let mut sampler = KvpPrefixSampler::new(1.0);
451 sampler.add_variant_fields([
452 KvpField::many("date", ["2025-01-01", "Jan 1, 2025"]),
453 KvpField::one("source", "variant-a"),
454 ]);
455 sampler.add_variant_fields([
456 KvpField::many("date", ["2025-01-01", "01/01/2025"]), KvpField::one("source", "variant-b"),
458 ]);
459
460 let meta = sampler.all_metadata();
461
462 let mut dates = meta["date"].clone();
464 dates.sort();
465 assert_eq!(dates, vec!["01/01/2025", "2025-01-01", "Jan 1, 2025"]);
466
467 let mut sources = meta["source"].clone();
469 sources.sort();
470 assert_eq!(sources, vec!["variant-a", "variant-b"]);
471 }
472
473 #[test]
474 fn all_metadata_ignores_field_presence_probability() {
475 let mut sampler = KvpPrefixSampler::new(1.0);
478 sampler.add_variant_fields([
479 KvpField::one("always", "yes").with_presence(1.0),
480 KvpField::one("never", "hidden").with_presence(0.0),
481 ]);
482
483 let meta = sampler.all_metadata();
484 assert_eq!(meta["always"], vec!["yes"]);
485 assert_eq!(meta["never"], vec!["hidden"]);
486 }
487}