Skip to main content

oximedia_analytics/
ctr.rs

1//! Click-through rate (CTR) tracking for thumbnails and content previews.
2//!
3//! Tracks impression and click events per item (thumbnail, preview card, etc.)
4//! and computes click-through rates, confidence intervals, and performance
5//! rankings across a catalogue of content items.
6//!
7//! ## Definitions
8//!
9//! - **Impression** — an item was displayed to a viewer.
10//! - **Click** — a viewer interacted with (clicked/tapped) the item.
11//! - **CTR** — `clicks / impressions` expressed as a fraction in `[0.0, 1.0]`.
12//!
13//! Wilson score confidence intervals are used for the CTR bounds because they
14//! remain valid even for small sample sizes and extreme rates (near 0 or 1)
15//! unlike the normal approximation.
16//!
17//! ## Example
18//!
19//! ```rust
20//! use oximedia_analytics::ctr::{CtrTracker, CtrVariant};
21//!
22//! let mut tracker = CtrTracker::new();
23//! tracker.record_impression("thumb_a");
24//! tracker.record_impression("thumb_a");
25//! tracker.record_click("thumb_a");
26//!
27//! let stats = tracker.stats("thumb_a").unwrap();
28//! assert_eq!(stats.impressions, 2);
29//! assert_eq!(stats.clicks, 1);
30//! assert!((stats.ctr() - 0.5).abs() < 1e-6);
31//! ```
32
33use crate::error::AnalyticsError;
34use std::collections::HashMap;
35
36// ─── CtrStats ────────────────────────────────────────────────────────────────
37
38/// Raw impression/click counts and derived metrics for a single item.
39#[derive(Debug, Clone, PartialEq)]
40pub struct CtrStats {
41    /// The item identifier (e.g. thumbnail slug or content ID).
42    pub item_id: String,
43    /// Total number of times this item was shown to viewers.
44    pub impressions: u64,
45    /// Total number of times viewers clicked/tapped this item.
46    pub clicks: u64,
47}
48
49impl CtrStats {
50    /// Computes the raw click-through rate: `clicks / impressions`.
51    ///
52    /// Returns `0.0` when `impressions == 0`.
53    #[must_use]
54    pub fn ctr(&self) -> f64 {
55        if self.impressions == 0 {
56            0.0
57        } else {
58            self.clicks as f64 / self.impressions as f64
59        }
60    }
61
62    /// Computes the Wilson score confidence interval for the CTR.
63    ///
64    /// Returns `(lower, upper)` bounds at the given `z` level.  Common values:
65    /// - `z = 1.645` → 90 % confidence
66    /// - `z = 1.960` → 95 % confidence  (default)
67    /// - `z = 2.576` → 99 % confidence
68    ///
69    /// Returns `(0.0, 0.0)` when `impressions == 0`.
70    ///
71    /// # Reference
72    ///
73    /// Wilson, E. B. (1927). Probable inference, the law of succession, and
74    /// statistical inference. *Journal of the American Statistical Association*,
75    /// 22(158), 209-212.
76    #[must_use]
77    pub fn wilson_interval(&self, z: f64) -> (f64, f64) {
78        let n = self.impressions as f64;
79        if n == 0.0 {
80            return (0.0, 0.0);
81        }
82        let p = self.clicks as f64 / n;
83        let z2 = z * z;
84        let denom = 1.0 + z2 / n;
85        let centre = (p + z2 / (2.0 * n)) / denom;
86        let margin = (z / denom) * ((p * (1.0 - p) / n) + z2 / (4.0 * n * n)).sqrt();
87        ((centre - margin).max(0.0), (centre + margin).min(1.0))
88    }
89
90    /// Returns `true` when this item has never been shown.
91    #[must_use]
92    pub fn is_untracked(&self) -> bool {
93        self.impressions == 0
94    }
95}
96
97// ─── CtrVariant ──────────────────────────────────────────────────────────────
98
99/// A summarised variant for ranking and A/B reporting.
100#[derive(Debug, Clone, PartialEq)]
101pub struct CtrVariant {
102    /// Item identifier.
103    pub item_id: String,
104    /// Raw CTR in `[0.0, 1.0]`.
105    pub ctr: f64,
106    /// Wilson 95 % confidence lower bound.
107    pub ci_lower: f64,
108    /// Wilson 95 % confidence upper bound.
109    pub ci_upper: f64,
110    /// Total impressions recorded.
111    pub impressions: u64,
112    /// Total clicks recorded.
113    pub clicks: u64,
114}
115
116// ─── CtrTracker ──────────────────────────────────────────────────────────────
117
118/// Tracks CTR metrics for an arbitrary number of items.
119///
120/// Each item is identified by a `&str` key.  Items are created on first
121/// impression or click — there is no need to pre-register them.
122#[derive(Debug, Default, Clone)]
123pub struct CtrTracker {
124    data: HashMap<String, CtrStats>,
125}
126
127impl CtrTracker {
128    /// Creates an empty `CtrTracker`.
129    #[must_use]
130    pub fn new() -> Self {
131        Self {
132            data: HashMap::new(),
133        }
134    }
135
136    /// Records one impression for `item_id`.
137    ///
138    /// If the item is not yet tracked it is created with zero counts first.
139    pub fn record_impression(&mut self, item_id: &str) {
140        let entry = self
141            .data
142            .entry(item_id.to_owned())
143            .or_insert_with(|| CtrStats {
144                item_id: item_id.to_owned(),
145                impressions: 0,
146                clicks: 0,
147            });
148        entry.impressions = entry.impressions.saturating_add(1);
149    }
150
151    /// Records `count` impressions for `item_id` in a single call.
152    ///
153    /// Useful for bulk-ingesting impression counts from a reporting pipeline.
154    pub fn record_impressions(&mut self, item_id: &str, count: u64) {
155        let entry = self
156            .data
157            .entry(item_id.to_owned())
158            .or_insert_with(|| CtrStats {
159                item_id: item_id.to_owned(),
160                impressions: 0,
161                clicks: 0,
162            });
163        entry.impressions = entry.impressions.saturating_add(count);
164    }
165
166    /// Records one click for `item_id`.
167    ///
168    /// If the item is not yet tracked it is created with zero counts first.
169    /// Clicks without a preceding impression are allowed (e.g. deep links).
170    pub fn record_click(&mut self, item_id: &str) {
171        let entry = self
172            .data
173            .entry(item_id.to_owned())
174            .or_insert_with(|| CtrStats {
175                item_id: item_id.to_owned(),
176                impressions: 0,
177                clicks: 0,
178            });
179        entry.clicks = entry.clicks.saturating_add(1);
180    }
181
182    /// Records `count` clicks for `item_id` in a single call.
183    pub fn record_clicks(&mut self, item_id: &str, count: u64) {
184        let entry = self
185            .data
186            .entry(item_id.to_owned())
187            .or_insert_with(|| CtrStats {
188                item_id: item_id.to_owned(),
189                impressions: 0,
190                clicks: 0,
191            });
192        entry.clicks = entry.clicks.saturating_add(count);
193    }
194
195    /// Returns the [`CtrStats`] for `item_id`, or `None` if it has never been
196    /// seen.
197    #[must_use]
198    pub fn stats(&self, item_id: &str) -> Option<&CtrStats> {
199        self.data.get(item_id)
200    }
201
202    /// Returns the raw CTR for `item_id`.
203    ///
204    /// # Errors
205    ///
206    /// Returns [`AnalyticsError::InvalidInput`] when `item_id` is not tracked.
207    pub fn ctr(&self, item_id: &str) -> Result<f64, AnalyticsError> {
208        self.data
209            .get(item_id)
210            .map(CtrStats::ctr)
211            .ok_or_else(|| AnalyticsError::InvalidInput(format!("item '{item_id}' not tracked")))
212    }
213
214    /// Returns items ranked by CTR descending.
215    ///
216    /// Items with no impressions are placed at the bottom (CTR = 0).  Items
217    /// with equal CTR are ordered by total impressions descending (more data
218    /// first).
219    #[must_use]
220    pub fn ranked(&self) -> Vec<CtrVariant> {
221        let z = 1.960_f64; // 95 % confidence
222        let mut variants: Vec<CtrVariant> = self
223            .data
224            .values()
225            .map(|s| {
226                let (lo, hi) = s.wilson_interval(z);
227                CtrVariant {
228                    item_id: s.item_id.clone(),
229                    ctr: s.ctr(),
230                    ci_lower: lo,
231                    ci_upper: hi,
232                    impressions: s.impressions,
233                    clicks: s.clicks,
234                }
235            })
236            .collect();
237
238        // Primary sort: CTR descending.  Secondary: impressions descending.
239        variants.sort_by(|a, b| {
240            b.ctr
241                .partial_cmp(&a.ctr)
242                .unwrap_or(std::cmp::Ordering::Equal)
243                .then_with(|| b.impressions.cmp(&a.impressions))
244        });
245
246        variants
247    }
248
249    /// Returns the item with the highest CTR among those with at least
250    /// `min_impressions` impressions.
251    ///
252    /// # Errors
253    ///
254    /// Returns [`AnalyticsError::InsufficientData`] when no item meets the
255    /// `min_impressions` threshold.
256    pub fn winner(&self, min_impressions: u64) -> Result<CtrVariant, AnalyticsError> {
257        let z = 1.960_f64;
258        self.data
259            .values()
260            .filter(|s| s.impressions >= min_impressions)
261            .max_by(|a, b| {
262                a.ctr()
263                    .partial_cmp(&b.ctr())
264                    .unwrap_or(std::cmp::Ordering::Equal)
265            })
266            .map(|s| {
267                let (lo, hi) = s.wilson_interval(z);
268                CtrVariant {
269                    item_id: s.item_id.clone(),
270                    ctr: s.ctr(),
271                    ci_lower: lo,
272                    ci_upper: hi,
273                    impressions: s.impressions,
274                    clicks: s.clicks,
275                }
276            })
277            .ok_or_else(|| {
278                AnalyticsError::InsufficientData(format!(
279                    "no item has ≥ {min_impressions} impressions"
280                ))
281            })
282    }
283
284    /// Returns the total number of items currently being tracked.
285    #[must_use]
286    pub fn item_count(&self) -> usize {
287        self.data.len()
288    }
289
290    /// Clears all tracking data.
291    pub fn reset(&mut self) {
292        self.data.clear();
293    }
294}
295
296// ─── Tests ────────────────────────────────────────────────────────────────────
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301
302    // ── CtrStats ──────────────────────────────────────────────────────────────
303
304    #[test]
305    fn ctr_zero_when_no_impressions() {
306        let s = CtrStats {
307            item_id: "x".into(),
308            impressions: 0,
309            clicks: 0,
310        };
311        assert_eq!(s.ctr(), 0.0);
312        assert!(s.is_untracked());
313    }
314
315    #[test]
316    fn ctr_computed_correctly() {
317        let s = CtrStats {
318            item_id: "a".into(),
319            impressions: 4,
320            clicks: 1,
321        };
322        assert!((s.ctr() - 0.25).abs() < 1e-9);
323    }
324
325    #[test]
326    fn wilson_interval_symmetry_at_50_percent() {
327        // 50 % CTR with 100 impressions — interval should be symmetric around 0.5.
328        let s = CtrStats {
329            item_id: "sym".into(),
330            impressions: 100,
331            clicks: 50,
332        };
333        let (lo, hi) = s.wilson_interval(1.960);
334        // Both bounds should exist and be roughly equidistant from 0.5.
335        let dist_lo = 0.5 - lo;
336        let dist_hi = hi - 0.5;
337        assert!((dist_lo - dist_hi).abs() < 0.01, "lo={lo:.4}, hi={hi:.4}");
338    }
339
340    #[test]
341    fn wilson_interval_empty_item() {
342        let s = CtrStats {
343            item_id: "empty".into(),
344            impressions: 0,
345            clicks: 0,
346        };
347        let (lo, hi) = s.wilson_interval(1.960);
348        assert_eq!((lo, hi), (0.0, 0.0));
349    }
350
351    #[test]
352    fn wilson_interval_bounds_in_range() {
353        let s = CtrStats {
354            item_id: "b".into(),
355            impressions: 200,
356            clicks: 30,
357        };
358        let (lo, hi) = s.wilson_interval(1.960);
359        assert!(lo >= 0.0 && lo <= 1.0, "lower={lo}");
360        assert!(hi >= 0.0 && hi <= 1.0, "upper={hi}");
361        assert!(lo < hi, "lower must be < upper");
362    }
363
364    // ── CtrTracker ────────────────────────────────────────────────────────────
365
366    #[test]
367    fn record_impression_and_click() {
368        let mut t = CtrTracker::new();
369        t.record_impression("thumb_a");
370        t.record_impression("thumb_a");
371        t.record_click("thumb_a");
372
373        let s = t.stats("thumb_a").expect("exists");
374        assert_eq!(s.impressions, 2);
375        assert_eq!(s.clicks, 1);
376        assert!((s.ctr() - 0.5).abs() < 1e-9);
377    }
378
379    #[test]
380    fn stats_unknown_item_returns_none() {
381        let t = CtrTracker::new();
382        assert!(t.stats("unknown").is_none());
383    }
384
385    #[test]
386    fn ctr_unknown_item_errors() {
387        let t = CtrTracker::new();
388        assert!(t.ctr("ghost").is_err());
389    }
390
391    #[test]
392    fn ranked_orders_by_ctr_descending() {
393        let mut t = CtrTracker::new();
394        // "low": 5 clicks / 100 impressions = 5 %
395        t.record_impressions("low", 100);
396        t.record_clicks("low", 5);
397        // "high": 20 clicks / 100 impressions = 20 %
398        t.record_impressions("high", 100);
399        t.record_clicks("high", 20);
400
401        let ranked = t.ranked();
402        assert_eq!(ranked[0].item_id, "high");
403        assert_eq!(ranked[1].item_id, "low");
404    }
405
406    #[test]
407    fn winner_selects_best_item() {
408        let mut t = CtrTracker::new();
409        t.record_impressions("a", 1000);
410        t.record_clicks("a", 50);
411        t.record_impressions("b", 1000);
412        t.record_clicks("b", 200);
413
414        let w = t.winner(100).expect("winner found");
415        assert_eq!(w.item_id, "b");
416        assert!((w.ctr - 0.2).abs() < 1e-9);
417    }
418
419    #[test]
420    fn winner_errors_when_min_impressions_not_met() {
421        let mut t = CtrTracker::new();
422        t.record_impressions("tiny", 5);
423        t.record_clicks("tiny", 1);
424
425        assert!(t.winner(100).is_err());
426    }
427
428    #[test]
429    fn item_count_and_reset() {
430        let mut t = CtrTracker::new();
431        t.record_impression("x");
432        t.record_impression("y");
433        assert_eq!(t.item_count(), 2);
434        t.reset();
435        assert_eq!(t.item_count(), 0);
436    }
437
438    #[test]
439    fn bulk_impression_and_click_recording() {
440        let mut t = CtrTracker::new();
441        t.record_impressions("bulk", 500);
442        t.record_clicks("bulk", 100);
443        let s = t.stats("bulk").expect("exists");
444        assert_eq!(s.impressions, 500);
445        assert_eq!(s.clicks, 100);
446        assert!((s.ctr() - 0.2).abs() < 1e-9);
447    }
448
449    #[test]
450    fn click_without_impression_allowed() {
451        let mut t = CtrTracker::new();
452        t.record_click("deep_link"); // deep link — no prior impression
453        let s = t.stats("deep_link").expect("exists");
454        assert_eq!(s.impressions, 0);
455        assert_eq!(s.clicks, 1);
456        assert_eq!(s.ctr(), 0.0); // 0 impressions → ctr 0
457    }
458
459    #[test]
460    fn ranked_ci_bounds_populated() {
461        let mut t = CtrTracker::new();
462        t.record_impressions("item", 200);
463        t.record_clicks("item", 40);
464        let ranked = t.ranked();
465        assert_eq!(ranked.len(), 1);
466        let v = &ranked[0];
467        assert!(v.ci_lower < v.ctr);
468        assert!(v.ci_upper > v.ctr);
469    }
470}