Skip to main content

triplets_core/
kvp.rs

1use rand::Rng;
2use rand::seq::{IndexedRandom, SliceRandom};
3use serde::{Deserialize, Serialize};
4use std::collections::{HashMap, HashSet};
5
6use crate::metadata::{METADATA_DELIMITER, MetadataKey};
7use crate::types::KvpValue;
8
9/// Represents a single key with one or more value renderings.
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct KvpField {
12    key: String,
13    values: Vec<KvpValue>,
14    presence: f32,
15}
16
17impl KvpField {
18    /// Create a field with exactly one rendering option.
19    pub fn one(key: impl Into<String>, value: impl Into<String>) -> Self {
20        Self::many(key, [value])
21    }
22
23    /// Create a field with multiple rendering options (duplicates ignored, order preserved).
24    pub fn many<K, V, I>(key: K, values: I) -> Self
25    where
26        K: Into<String>,
27        I: IntoIterator<Item = V>,
28        V: Into<String>,
29    {
30        let mut seen = HashSet::new();
31        let mut collected = Vec::new();
32        for value in values.into_iter() {
33            let value = value.into();
34            if value.is_empty() {
35                continue;
36            }
37            if seen.insert(value.clone()) {
38                collected.push(value);
39            }
40        }
41        Self {
42            key: key.into(),
43            values: collected,
44            presence: 1.0,
45        }
46    }
47
48    /// Override how often this field should appear (0.0=never, 1.0=always).
49    pub fn with_presence(mut self, probability: f32) -> Self {
50        self.presence = probability.clamp(0.0, 1.0);
51        self
52    }
53
54    fn is_empty(&self) -> bool {
55        self.values.is_empty()
56    }
57
58    fn render<R: Rng + ?Sized>(&self, rng: &mut R) -> Option<String> {
59        if self.presence <= 0.0 {
60            return None;
61        }
62        if self.presence < 1.0 && rng.random::<f32>() >= self.presence {
63            return None;
64        }
65        self.values
66            .choose(rng)
67            .map(|value| format!("{}{}{}", self.key, METADATA_DELIMITER, value))
68    }
69}
70
71/// Samples key-value "meta" prefixes with optional dropout.
72#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct KvpPrefixSampler {
74    dropout: f32,
75    variants: Vec<Vec<KvpField>>,
76}
77
78impl KvpPrefixSampler {
79    /// Create a new sampler that emits prefixes with the provided probability.
80    pub fn new(dropout: f32) -> Self {
81        Self {
82            dropout: dropout.clamp(0.0, 1.0),
83            variants: Vec::new(),
84        }
85    }
86
87    /// Register another variant using simple key-value pairs (single rendering per key).
88    pub fn add_variant<K, V, I>(&mut self, fields: I)
89    where
90        I: IntoIterator<Item = (K, V)>,
91        K: Into<String>,
92        V: Into<String>,
93    {
94        let variant = fields
95            .into_iter()
96            .map(|(key, value)| KvpField::one(key, value))
97            .collect::<Vec<_>>();
98        self.add_variant_fields(variant);
99    }
100
101    /// Register another variant that may contain multi-valued fields.
102    pub fn add_variant_fields<I>(&mut self, fields: I)
103    where
104        I: IntoIterator<Item = KvpField>,
105    {
106        let mut variant = Vec::new();
107        for field in fields.into_iter() {
108            if !field.is_empty() {
109                variant.push(field);
110            }
111        }
112        if variant.is_empty() {
113            return;
114        }
115        self.variants.push(variant);
116    }
117
118    /// Sample a formatted prefix using the configured dropout rate and variants.
119    pub fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Option<String> {
120        if self.variants.is_empty() || self.dropout <= 0.0 {
121            return None;
122        }
123        if self.dropout < 1.0 && rng.random::<f32>() >= self.dropout {
124            return None;
125        }
126        self.variants
127            .choose(rng)
128            .and_then(|variant| format_variant(variant, rng))
129    }
130
131    /// Returns all metadata keys and all their possible values across every variant.
132    ///
133    /// This method does **not** involve any RNG, presence rolls, or dropout — it simply
134    /// collects every key and every value defined on the sampler. Use the result to
135    /// populate `RecordChunk::kvp_meta` for downstream inspection/debugging.
136    pub fn all_metadata(&self) -> HashMap<String, Vec<String>> {
137        let mut map: HashMap<String, Vec<String>> = HashMap::new();
138        for variant in &self.variants {
139            for field in variant {
140                let entry = map.entry(field.key.clone()).or_default();
141                for value in &field.values {
142                    if !entry.contains(value) {
143                        entry.push(value.clone());
144                    }
145                }
146            }
147        }
148        map
149    }
150}
151
152/// Describes how to build a metadata field for a given context (e.g. date).
153pub struct MetaFieldSpec<Ctx> {
154    key: MetadataKey,
155    presence: f32,
156    values_builder: fn(&Ctx) -> Vec<KvpValue>,
157}
158
159impl<Ctx> MetaFieldSpec<Ctx> {
160    /// Create a field specification from key, presence probability, and value builder.
161    pub const fn new(
162        key: MetadataKey,
163        presence: f32,
164        values_builder: fn(&Ctx) -> Vec<KvpValue>,
165    ) -> Self {
166        Self {
167            key,
168            presence,
169            values_builder,
170        }
171    }
172
173    fn build(&self, ctx: &Ctx) -> KvpField {
174        let values = (self.values_builder)(ctx);
175        KvpField::many(self.key.as_str(), values).with_presence(self.presence)
176    }
177}
178
179/// Holds the ordered list of metadata fields to emit for a source.
180pub struct MetaPolicy<Ctx: 'static> {
181    fields: &'static [MetaFieldSpec<Ctx>],
182}
183
184impl<Ctx: 'static> MetaPolicy<Ctx> {
185    /// Create a policy from an ordered static list of field specs.
186    pub const fn new(fields: &'static [MetaFieldSpec<Ctx>]) -> Self {
187        Self { fields }
188    }
189
190    /// Instantiate a `KvpPrefixSampler` for one concrete context value.
191    pub fn instantiate(&self, ctx: &Ctx) -> KvpPrefixSampler {
192        let built_fields = self
193            .fields
194            .iter()
195            .map(|field| field.build(ctx))
196            .collect::<Vec<_>>();
197        let mut sampler = KvpPrefixSampler::new(1.0);
198        sampler.add_variant_fields(built_fields);
199        sampler
200    }
201}
202
203fn format_variant<R: Rng + ?Sized>(fields: &[KvpField], rng: &mut R) -> Option<String> {
204    let mut body = Vec::new();
205    for field in fields {
206        if let Some(rendered) = field.render(rng) {
207            body.push(rendered);
208        }
209    }
210    if body.is_empty() {
211        return None;
212    }
213    if body.len() > 1 {
214        body.shuffle(rng);
215    }
216    Some(format!("meta: {}", body.join(" | ")))
217}
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222    use rand::SeedableRng;
223    use rand::rngs::StdRng;
224
225    #[test]
226    fn respects_dropout_probability() {
227        let mut sampler = KvpPrefixSampler::new(1.0);
228        sampler.add_variant([("foo", "bar")]);
229        let mut rng = StdRng::from_seed([0_u8; 32]);
230        assert_eq!(sampler.sample(&mut rng), Some("meta: foo=bar".into()));
231
232        let mut zero_sampler = KvpPrefixSampler::new(0.0);
233        zero_sampler.add_variant([("foo", "bar")]);
234        assert!(zero_sampler.sample(&mut rng).is_none());
235    }
236
237    #[test]
238    fn chooses_between_variants() {
239        let mut sampler = KvpPrefixSampler::new(1.0);
240        sampler.add_variant([("a", "1")]);
241        sampler.add_variant([("b", "2")]);
242        let mut rng = StdRng::from_seed([1_u8; 32]);
243        let first = sampler.sample(&mut rng).unwrap();
244        assert!(first == "meta: a=1" || first == "meta: b=2");
245    }
246
247    #[test]
248    fn ignores_empty_variants() {
249        let mut sampler = KvpPrefixSampler::new(1.0);
250        sampler.add_variant([] as [(&str, &str); 0]);
251        let mut rng = StdRng::from_seed([2_u8; 32]);
252        assert!(sampler.sample(&mut rng).is_none());
253    }
254
255    #[test]
256    fn field_value_options_are_deduped_and_randomized() {
257        let field = KvpField::many("date", ["2025-01-01", "Jan 1, 2025", "2025-01-01"]);
258        assert_eq!(field.key, "date");
259        assert_eq!(field.values, vec!["2025-01-01", "Jan 1, 2025"]);
260
261        let mut rng = StdRng::from_seed([3_u8; 32]);
262        let first = field.render(&mut rng).unwrap();
263        let second = field.render(&mut rng).unwrap();
264        assert!(first == "date=2025-01-01" || first == "date=Jan 1, 2025");
265        assert!(second == "date=2025-01-01" || second == "date=Jan 1, 2025");
266    }
267
268    #[test]
269    fn sampler_handles_multi_value_fields() {
270        let mut sampler = KvpPrefixSampler::new(1.0);
271        sampler.add_variant_fields([
272            KvpField::many("date", ["2025-01-01", "Jan 1, 2025"]),
273            KvpField::one("article", "ceo-update"),
274        ]);
275        let mut rng = StdRng::from_seed([4_u8; 32]);
276        let mut outputs = Vec::new();
277        for _ in 0..20 {
278            if let Some(sample) = sampler.sample(&mut rng) {
279                outputs.push(sample);
280            }
281        }
282        outputs.sort();
283        outputs.dedup();
284        assert!(outputs.len() >= 2);
285        assert!(
286            outputs
287                .iter()
288                .any(|value| value.contains("date=2025-01-01")
289                    && value.contains("article=ceo-update"))
290        );
291        assert!(outputs.iter().any(
292            |value| value.contains("date=Jan 1, 2025") && value.contains("article=ceo-update")
293        ));
294    }
295
296    #[test]
297    fn sampler_can_shuffle_field_order() {
298        let mut sampler = KvpPrefixSampler::new(1.0);
299        sampler.add_variant_fields([KvpField::one("alpha", "1"), KvpField::one("beta", "2")]);
300        let mut rng = StdRng::from_seed([5_u8; 32]);
301        let mut seen = std::collections::HashSet::new();
302        for _ in 0..20 {
303            if let Some(sample) = sampler.sample(&mut rng) {
304                seen.insert(sample);
305            }
306        }
307        assert!(seen.contains("meta: alpha=1 | beta=2"));
308        assert!(seen.contains("meta: beta=2 | alpha=1"));
309    }
310
311    #[test]
312    fn field_presence_controls_dropout() {
313        let absent = KvpField::one("foo", "bar").with_presence(0.0);
314        let mut rng = StdRng::from_seed([6_u8; 32]);
315        assert!(absent.render(&mut rng).is_none());
316
317        let present = KvpField::one("foo", "bar").with_presence(1.0);
318        let mut rng2 = StdRng::from_seed([7_u8; 32]);
319        assert_eq!(present.render(&mut rng2), Some("foo=bar".into()));
320    }
321
322    #[test]
323    fn sampler_and_field_probabilities_are_clamped() {
324        let mut always = KvpPrefixSampler::new(2.0);
325        always.add_variant([("k", "v")]);
326        let mut rng = StdRng::from_seed([8_u8; 32]);
327        assert!(always.sample(&mut rng).is_some());
328
329        let mut never = KvpPrefixSampler::new(-1.0);
330        never.add_variant([("k", "v")]);
331        assert!(never.sample(&mut rng).is_none());
332
333        let field = KvpField::one("a", "b").with_presence(2.0);
334        assert_eq!(field.presence, 1.0);
335        let field = KvpField::one("a", "b").with_presence(-5.0);
336        assert_eq!(field.presence, 0.0);
337    }
338
339    #[test]
340    fn variant_with_only_absent_fields_returns_none() {
341        let mut sampler = KvpPrefixSampler::new(1.0);
342        sampler.add_variant_fields([
343            KvpField::one("foo", "bar").with_presence(0.0),
344            KvpField::many("empty", [""]).with_presence(1.0),
345        ]);
346        let mut rng = StdRng::from_seed([9_u8; 32]);
347        assert!(sampler.sample(&mut rng).is_none());
348    }
349
350    #[derive(Clone)]
351    struct DemoCtx {
352        date: &'static str,
353        source: &'static str,
354    }
355
356    fn date_values(ctx: &DemoCtx) -> Vec<KvpValue> {
357        vec![ctx.date.into()]
358    }
359
360    fn source_values(ctx: &DemoCtx) -> Vec<KvpValue> {
361        vec![ctx.source.into()]
362    }
363
364    const DEMO_DATE_KEY: MetadataKey = MetadataKey::new("date");
365    const DEMO_SOURCE_KEY: MetadataKey = MetadataKey::new("source");
366
367    const POLICY_FIELDS: [MetaFieldSpec<DemoCtx>; 2] = [
368        MetaFieldSpec::new(DEMO_DATE_KEY, 1.0, date_values),
369        MetaFieldSpec::new(DEMO_SOURCE_KEY, 1.0, source_values),
370    ];
371
372    #[test]
373    fn meta_policy_instantiates_sampler_with_context_values() {
374        let policy = MetaPolicy::new(&POLICY_FIELDS);
375        let ctx = DemoCtx {
376            date: "2026-02-24",
377            source: "reports",
378        };
379        let sampler = policy.instantiate(&ctx);
380        let mut rng = StdRng::from_seed([10_u8; 32]);
381        let out = sampler.sample(&mut rng).unwrap();
382        assert!(out.contains("date=2026-02-24"));
383        assert!(out.contains("source=reports"));
384    }
385
386    #[test]
387    fn kvp_sampler_fractional_dropout_sometimes_suppresses_output() {
388        // Covers the `0.0 < dropout < 1.0` branch in KvpPrefixSampler::sample.
389        let mut sampler = KvpPrefixSampler::new(0.5);
390        sampler.add_variant([("k", "v")]);
391        let mut rng = StdRng::from_seed([77_u8; 32]);
392        let results: Vec<_> = (0..100).map(|_| sampler.sample(&mut rng)).collect();
393        assert!(
394            results.iter().any(|r| r.is_none()),
395            "dropout=0.5 should suppress some outputs"
396        );
397        assert!(
398            results.iter().any(|r| r.is_some()),
399            "dropout=0.5 should pass some outputs"
400        );
401    }
402
403    #[test]
404    fn meta_field_spec_new_is_callable_at_runtime() {
405        // Call MetaFieldSpec::new() in a runtime (non-const) context so the
406        // constructor body is instrumented by llvm-cov.
407        fn values(_: &()) -> Vec<KvpValue> {
408            vec!["runtime_val".to_string()]
409        }
410        let key = MetadataKey::new("runtime_key");
411        let spec = MetaFieldSpec::<()>::new(key, 1.0, values);
412        let field = spec.build(&());
413        let mut rng = StdRng::from_seed([42_u8; 32]);
414        assert!(field.render(&mut rng).is_some());
415    }
416
417    // ── all_metadata tests ────────────────────────────────────────────────────
418
419    #[test]
420    fn all_metadata_empty_when_no_variants() {
421        let sampler = KvpPrefixSampler::new(1.0);
422        assert!(sampler.all_metadata().is_empty());
423    }
424
425    #[test]
426    fn all_metadata_collects_all_keys_and_values_regardless_of_dropout() {
427        // dropout=0.0 means sample() always returns None, but all_metadata must
428        // still expose every declared key and value.
429        let mut sampler = KvpPrefixSampler::new(0.0);
430        sampler.add_variant_fields([
431            KvpField::many("date", ["2025-01-01", "Jan 1, 2025"]),
432            KvpField::one("source", "daily-report"),
433        ]);
434
435        let meta = sampler.all_metadata();
436        assert_eq!(meta.len(), 2);
437
438        let dates = &meta["date"];
439        assert_eq!(dates.len(), 2);
440        assert!(dates.contains(&"2025-01-01".to_string()));
441        assert!(dates.contains(&"Jan 1, 2025".to_string()));
442
443        assert_eq!(meta["source"], vec!["daily-report"]);
444    }
445
446    #[test]
447    fn all_metadata_collects_keys_across_variants_and_deduplicates_values() {
448        // Two variants share the "date" key. all_metadata must merge both variants'
449        // values under the same key without duplicates.
450        let mut sampler = KvpPrefixSampler::new(1.0);
451        sampler.add_variant_fields([
452            KvpField::many("date", ["2025-01-01", "Jan 1, 2025"]),
453            KvpField::one("source", "variant-a"),
454        ]);
455        sampler.add_variant_fields([
456            KvpField::many("date", ["2025-01-01", "01/01/2025"]), // "2025-01-01" already seen
457            KvpField::one("source", "variant-b"),
458        ]);
459
460        let meta = sampler.all_metadata();
461
462        // "date" values from both variants, deduped
463        let mut dates = meta["date"].clone();
464        dates.sort();
465        assert_eq!(dates, vec!["01/01/2025", "2025-01-01", "Jan 1, 2025"]);
466
467        // "source" values from both variants
468        let mut sources = meta["source"].clone();
469        sources.sort();
470        assert_eq!(sources, vec!["variant-a", "variant-b"]);
471    }
472
473    #[test]
474    fn all_metadata_ignores_field_presence_probability() {
475        // Fields with presence=0.0 are never sampled, but all_metadata should
476        // still include their values.
477        let mut sampler = KvpPrefixSampler::new(1.0);
478        sampler.add_variant_fields([
479            KvpField::one("always", "yes").with_presence(1.0),
480            KvpField::one("never", "hidden").with_presence(0.0),
481        ]);
482
483        let meta = sampler.all_metadata();
484        assert_eq!(meta["always"], vec!["yes"]);
485        assert_eq!(meta["never"], vec!["hidden"]);
486    }
487}