Skip to main content

gbp_core/
bounded.rs

1//! LRU-bounded set for per-epoch deduplication.
2//!
3//! Each sub-protocol client (GTP, GSP, GAP) maintains a deduplication set
4//! that MUST NOT grow unboundedly within a single epoch. This type provides
5//! a `HashSet` with insertion-order eviction: when capacity is exceeded the
6//! oldest entry is dropped.
7
8use std::collections::{HashSet, VecDeque};
9use std::hash::Hash;
10
11/// A set with a bounded capacity that evicts the oldest entry on overflow.
12///
13/// Insertion (`insert`) returns `true` when the item is new and was added,
14/// `false` when it was already present. When `len() == cap`, the least
15/// recently inserted item is removed before the new item is added.
16#[derive(Clone, Debug)]
17pub struct BoundedSeen<T> {
18    set: HashSet<T>,
19    order: VecDeque<T>,
20    cap: usize,
21}
22
23impl<T: Eq + Hash + Clone> BoundedSeen<T> {
24    /// Creates a new set with the given capacity limit.
25    ///
26    /// N=10000 is the recommended default per epoch (GTP §5, GSP §5).
27    pub fn new(cap: usize) -> Self {
28        Self {
29            set: HashSet::with_capacity(cap.min(1024)),
30            order: VecDeque::with_capacity(cap.min(1024)),
31            cap,
32        }
33    }
34
35    /// Inserts an item. Returns `true` if the item is new and was added,
36    /// `false` if it was already present.
37    ///
38    /// When the set is at capacity, the oldest entry is evicted.
39    pub fn insert(&mut self, item: T) -> bool {
40        if !self.set.insert(item.clone()) {
41            return false;
42        }
43        self.order.push_back(item);
44        if self.order.len() > self.cap {
45            if let Some(old) = self.order.pop_front() {
46                self.set.remove(&old);
47            }
48        }
49        true
50    }
51
52    /// Returns the number of entries currently in the set.
53    pub fn len(&self) -> usize {
54        self.set.len()
55    }
56
57    /// Returns `true` if the set is empty.
58    pub fn is_empty(&self) -> bool {
59        self.set.is_empty()
60    }
61
62    /// Clears the set.
63    pub fn clear(&mut self) {
64        self.set.clear();
65        self.order.clear();
66    }
67}
68
69#[cfg(test)]
70mod tests {
71    use super::*;
72
73    #[test]
74    fn insert_new_returns_true() {
75        let mut s = BoundedSeen::new(10);
76        assert!(s.insert(1));
77    }
78
79    #[test]
80    fn insert_duplicate_returns_false() {
81        let mut s = BoundedSeen::new(10);
82        s.insert(1);
83        assert!(!s.insert(1));
84    }
85
86    #[test]
87    fn evicts_oldest_on_overflow() {
88        let mut s = BoundedSeen::new(3);
89        s.insert(1);
90        s.insert(2);
91        s.insert(3);
92        // At capacity; next insert evicts 1 (oldest).
93        s.insert(4);
94        // 1 was evicted, fresh again — but this evicts 2.
95        assert!(s.insert(1));
96        // 4 was second-newest, still present.
97        assert!(!s.insert(4));
98        assert_eq!(s.len(), 3);
99    }
100
101    #[test]
102    fn clear_resets() {
103        let mut s = BoundedSeen::new(100);
104        s.insert("a");
105        s.clear();
106        assert!(s.is_empty());
107        assert!(s.insert("a")); // fresh after clear
108    }
109}