Skip to main content

cp_validator/
window.rs

1//! Window-based coordination for validators.
2//!
3//! Per CP-015 section 3: Validators coordinate through deterministic time
4//! windows without blockchain synchronization. All validators in the same
5//! hourly window independently select the same test queries from the
6//! canonical corpus using a deterministic RNG seeded by `BLAKE3(window_id)`.
7
8use crate::corpus::{TestCorpus, TestQuery};
9use rand::seq::SliceRandom;
10use rand::SeedableRng;
11use rand_chacha::ChaCha20Rng;
12use serde::{Deserialize, Serialize};
13
14/// Duration of each validation window in seconds.
15const WINDOW_DURATION_SECS: u64 = 3600; // 1 hour
16
17/// A deterministic validation window derived from the current time.
18///
19/// All validators observing the same wall clock hour will compute
20/// the same `window_id` and therefore select the same test queries.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
22pub struct ValidationWindow {
23    /// Window identifier: `unix_timestamp_seconds` / 3600.
24    pub window_id: u64,
25    /// Start of the window (Unix seconds).
26    pub start_secs: u64,
27    /// End of the window (Unix seconds).
28    pub end_secs: u64,
29}
30
31impl ValidationWindow {
32    /// Compute the current validation window from the system clock.
33    pub fn current() -> Self {
34        let now_secs = std::time::SystemTime::now()
35            .duration_since(std::time::UNIX_EPOCH)
36            .unwrap()
37            .as_secs();
38        Self::from_timestamp(now_secs)
39    }
40
41    /// Compute the validation window for a given Unix timestamp (seconds).
42    pub fn from_timestamp(unix_secs: u64) -> Self {
43        let window_id = unix_secs / WINDOW_DURATION_SECS;
44        let start_secs = window_id * WINDOW_DURATION_SECS;
45        let end_secs = start_secs + WINDOW_DURATION_SECS;
46
47        Self {
48            window_id,
49            start_secs,
50            end_secs,
51        }
52    }
53
54    /// Compute the validation window for a given window ID directly.
55    pub fn from_window_id(window_id: u64) -> Self {
56        let start_secs = window_id * WINDOW_DURATION_SECS;
57        let end_secs = start_secs + WINDOW_DURATION_SECS;
58
59        Self {
60            window_id,
61            start_secs,
62            end_secs,
63        }
64    }
65
66    /// Create a deterministic RNG seeded by `BLAKE3(window_id)`.
67    ///
68    /// Per CP-015 section 3: all validators in the same window use the
69    /// same RNG to select test queries, ensuring independent agreement.
70    pub fn rng(&self) -> ChaCha20Rng {
71        let seed_hash = blake3::hash(&self.window_id.to_le_bytes());
72        let seed_bytes: [u8; 32] = *seed_hash.as_bytes();
73        ChaCha20Rng::from_seed(seed_bytes)
74    }
75
76    /// Check if a given Unix timestamp (seconds) falls within this window.
77    pub fn contains(&self, unix_secs: u64) -> bool {
78        unix_secs >= self.start_secs && unix_secs < self.end_secs
79    }
80
81    /// Seconds remaining in this window from the given timestamp.
82    pub fn remaining_secs(&self, unix_secs: u64) -> u64 {
83        self.end_secs.saturating_sub(unix_secs)
84    }
85
86    /// Select test queries from the corpus for this window.
87    ///
88    /// Uses deterministic RNG so all validators independently arrive
89    /// at the same test set without communication.
90    pub fn select_test_queries(&self, corpus: &TestCorpus, count: usize) -> Vec<TestQuery> {
91        select_test_queries(self.window_id, &corpus.queries, count)
92    }
93}
94
95/// Select test queries from a corpus using a deterministic RNG seeded
96/// by the window ID.
97///
98/// Per CP-015 section 4: queries are shuffled deterministically using
99/// a `ChaCha20Rng` seeded by `BLAKE3(window_id)`, then the first `count`
100/// queries are taken.
101pub fn select_test_queries(
102    window_id: u64,
103    query_corpus: &[TestQuery],
104    count: usize,
105) -> Vec<TestQuery> {
106    if query_corpus.is_empty() || count == 0 {
107        return Vec::new();
108    }
109
110    let seed_hash = blake3::hash(&window_id.to_le_bytes());
111    let seed_bytes: [u8; 32] = *seed_hash.as_bytes();
112    let mut rng = ChaCha20Rng::from_seed(seed_bytes);
113
114    let mut shuffled: Vec<TestQuery> = query_corpus.to_vec();
115    shuffled.shuffle(&mut rng);
116
117    shuffled.truncate(count);
118    shuffled
119}
120
121#[cfg(test)]
122mod tests {
123    use super::*;
124    use uuid::Uuid;
125
126    fn make_corpus(n: usize) -> Vec<TestQuery> {
127        (0..n)
128            .map(|i| TestQuery {
129                query_text: format!("test query {i}"),
130                query_embedding: vec![i as i16; 10],
131                relevant_chunk_ids: vec![Uuid::from_bytes([i as u8; 16])],
132                relevance_grades: vec![1],
133            })
134            .collect()
135    }
136
137    #[test]
138    fn test_window_from_timestamp() {
139        // Timestamp 7200 seconds = 2 hours since epoch
140        let w = ValidationWindow::from_timestamp(7200);
141        assert_eq!(w.window_id, 2);
142        assert_eq!(w.start_secs, 7200);
143        assert_eq!(w.end_secs, 10800);
144    }
145
146    #[test]
147    fn test_window_from_timestamp_mid_hour() {
148        // 1.5 hours = 5400 seconds, should be in window 1
149        let w = ValidationWindow::from_timestamp(5400);
150        assert_eq!(w.window_id, 1);
151        assert_eq!(w.start_secs, 3600);
152        assert_eq!(w.end_secs, 7200);
153    }
154
155    #[test]
156    fn test_window_from_window_id() {
157        let w = ValidationWindow::from_window_id(100);
158        assert_eq!(w.window_id, 100);
159        assert_eq!(w.start_secs, 360000);
160        assert_eq!(w.end_secs, 363600);
161    }
162
163    #[test]
164    fn test_window_contains() {
165        let w = ValidationWindow::from_window_id(10);
166        assert!(w.contains(36000)); // Start of window
167        assert!(w.contains(37800)); // Middle
168        assert!(w.contains(39599)); // Just before end
169        assert!(!w.contains(39600)); // End (exclusive)
170        assert!(!w.contains(35999)); // Before start
171    }
172
173    #[test]
174    fn test_window_remaining_secs() {
175        let w = ValidationWindow::from_window_id(10);
176        assert_eq!(w.remaining_secs(36000), 3600); // Start: full hour
177        assert_eq!(w.remaining_secs(37800), 1800); // Middle: 30 min
178        assert_eq!(w.remaining_secs(39600), 0); // End: none
179        assert_eq!(w.remaining_secs(40000), 0); // Past end
180    }
181
182    #[test]
183    fn test_rng_deterministic() {
184        let w = ValidationWindow::from_window_id(42);
185        let mut rng1 = w.rng();
186        let mut rng2 = w.rng();
187
188        // Both RNGs should produce the same sequence
189        let mut corpus = make_corpus(20);
190        let mut corpus2 = corpus.clone();
191
192        corpus.shuffle(&mut rng1);
193        corpus2.shuffle(&mut rng2);
194
195        for (a, b) in corpus.iter().zip(corpus2.iter()) {
196            assert_eq!(a.query_text, b.query_text);
197        }
198    }
199
200    #[test]
201    fn test_rng_different_windows_produce_different_sequences() {
202        let w1 = ValidationWindow::from_window_id(42);
203        let w2 = ValidationWindow::from_window_id(43);
204        let mut rng1 = w1.rng();
205        let mut rng2 = w2.rng();
206
207        let mut corpus1 = make_corpus(20);
208        let mut corpus2 = corpus1.clone();
209
210        corpus1.shuffle(&mut rng1);
211        corpus2.shuffle(&mut rng2);
212
213        // At least one query should be in a different position
214        let same_count = corpus1
215            .iter()
216            .zip(corpus2.iter())
217            .filter(|(a, b)| a.query_text == b.query_text)
218            .count();
219        assert!(
220            same_count < 20,
221            "Different windows should produce different orderings"
222        );
223    }
224
225    #[test]
226    fn test_select_test_queries_count() {
227        let corpus = make_corpus(50);
228        let selected = select_test_queries(100, &corpus, 10);
229        assert_eq!(selected.len(), 10);
230    }
231
232    #[test]
233    fn test_select_test_queries_count_exceeds_corpus() {
234        let corpus = make_corpus(5);
235        let selected = select_test_queries(100, &corpus, 10);
236        assert_eq!(selected.len(), 5); // Can only return what exists
237    }
238
239    #[test]
240    fn test_select_test_queries_empty_corpus() {
241        let selected = select_test_queries(100, &[], 10);
242        assert!(selected.is_empty());
243    }
244
245    #[test]
246    fn test_select_test_queries_zero_count() {
247        let corpus = make_corpus(10);
248        let selected = select_test_queries(100, &corpus, 0);
249        assert!(selected.is_empty());
250    }
251
252    #[test]
253    fn test_select_test_queries_deterministic() {
254        let corpus = make_corpus(30);
255
256        let s1 = select_test_queries(42, &corpus, 10);
257        let s2 = select_test_queries(42, &corpus, 10);
258
259        assert_eq!(s1.len(), s2.len());
260        for (a, b) in s1.iter().zip(s2.iter()) {
261            assert_eq!(a.query_text, b.query_text);
262        }
263    }
264
265    #[test]
266    fn test_select_test_queries_different_windows() {
267        let corpus = make_corpus(30);
268
269        let s1 = select_test_queries(42, &corpus, 10);
270        let s2 = select_test_queries(43, &corpus, 10);
271
272        // Should select different subsets (with high probability)
273        let texts1: Vec<&str> = s1.iter().map(|q| q.query_text.as_str()).collect();
274        let texts2: Vec<&str> = s2.iter().map(|q| q.query_text.as_str()).collect();
275        assert_ne!(texts1, texts2);
276    }
277
278    #[test]
279    fn test_window_boundary_consistency() {
280        // Timestamps right at the boundary should be in the correct window
281        let w_before = ValidationWindow::from_timestamp(3599);
282        let w_at = ValidationWindow::from_timestamp(3600);
283        let w_after = ValidationWindow::from_timestamp(3601);
284
285        assert_eq!(w_before.window_id, 0);
286        assert_eq!(w_at.window_id, 1);
287        assert_eq!(w_after.window_id, 1);
288    }
289
290    #[test]
291    fn test_window_method_select_queries() {
292        let corpus = TestCorpus {
293            queries: make_corpus(30),
294        };
295        let window = ValidationWindow::from_window_id(42);
296        let selected = window.select_test_queries(&corpus, 5);
297        assert_eq!(selected.len(), 5);
298
299        // Should match the standalone function
300        let standalone = select_test_queries(42, &corpus.queries, 5);
301        for (a, b) in selected.iter().zip(standalone.iter()) {
302            assert_eq!(a.query_text, b.query_text);
303        }
304    }
305}