Skip to main content

cp_validator/
window.rs

1//! Window-based coordination for validators.
2//!
3//! Per CP-015 section 3: Validators coordinate through deterministic time
4//! windows without blockchain synchronization. All validators in the same
5//! hourly window independently select the same test queries from the
6//! canonical corpus using a deterministic RNG seeded by BLAKE3(window_id).
7
8use crate::corpus::{TestCorpus, TestQuery};
9use rand::seq::SliceRandom;
10use rand::SeedableRng;
11use rand_chacha::ChaCha20Rng;
12use serde::{Deserialize, Serialize};
13
14/// Duration of each validation window in seconds.
15const WINDOW_DURATION_SECS: u64 = 3600; // 1 hour
16
17/// A deterministic validation window derived from the current time.
18///
19/// All validators observing the same wall clock hour will compute
20/// the same window_id and therefore select the same test queries.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
22pub struct ValidationWindow {
23    /// Window identifier: unix_timestamp_seconds / 3600.
24    pub window_id: u64,
25    /// Start of the window (Unix seconds).
26    pub start_secs: u64,
27    /// End of the window (Unix seconds).
28    pub end_secs: u64,
29}
30
31impl ValidationWindow {
32    /// Compute the current validation window from the system clock.
33    pub fn current() -> Self {
34        let now_secs = std::time::SystemTime::now()
35            .duration_since(std::time::UNIX_EPOCH)
36            .unwrap()
37            .as_secs();
38        Self::from_timestamp(now_secs)
39    }
40
41    /// Compute the validation window for a given Unix timestamp (seconds).
42    pub fn from_timestamp(unix_secs: u64) -> Self {
43        let window_id = unix_secs / WINDOW_DURATION_SECS;
44        let start_secs = window_id * WINDOW_DURATION_SECS;
45        let end_secs = start_secs + WINDOW_DURATION_SECS;
46
47        Self {
48            window_id,
49            start_secs,
50            end_secs,
51        }
52    }
53
54    /// Compute the validation window for a given window ID directly.
55    pub fn from_window_id(window_id: u64) -> Self {
56        let start_secs = window_id * WINDOW_DURATION_SECS;
57        let end_secs = start_secs + WINDOW_DURATION_SECS;
58
59        Self {
60            window_id,
61            start_secs,
62            end_secs,
63        }
64    }
65
66    /// Create a deterministic RNG seeded by BLAKE3(window_id).
67    ///
68    /// Per CP-015 section 3: all validators in the same window use the
69    /// same RNG to select test queries, ensuring independent agreement.
70    pub fn rng(&self) -> ChaCha20Rng {
71        let seed_hash = blake3::hash(&self.window_id.to_le_bytes());
72        let seed_bytes: [u8; 32] = *seed_hash.as_bytes();
73        ChaCha20Rng::from_seed(seed_bytes)
74    }
75
76    /// Check if a given Unix timestamp (seconds) falls within this window.
77    pub fn contains(&self, unix_secs: u64) -> bool {
78        unix_secs >= self.start_secs && unix_secs < self.end_secs
79    }
80
81    /// Seconds remaining in this window from the given timestamp.
82    pub fn remaining_secs(&self, unix_secs: u64) -> u64 {
83        if unix_secs >= self.end_secs {
84            0
85        } else {
86            self.end_secs - unix_secs
87        }
88    }
89
90    /// Select test queries from the corpus for this window.
91    ///
92    /// Uses deterministic RNG so all validators independently arrive
93    /// at the same test set without communication.
94    pub fn select_test_queries(
95        &self,
96        corpus: &TestCorpus,
97        count: usize,
98    ) -> Vec<TestQuery> {
99        select_test_queries(self.window_id, &corpus.queries, count)
100    }
101}
102
103/// Select test queries from a corpus using a deterministic RNG seeded
104/// by the window ID.
105///
106/// Per CP-015 section 4: queries are shuffled deterministically using
107/// a ChaCha20Rng seeded by BLAKE3(window_id), then the first `count`
108/// queries are taken.
109pub fn select_test_queries(
110    window_id: u64,
111    query_corpus: &[TestQuery],
112    count: usize,
113) -> Vec<TestQuery> {
114    if query_corpus.is_empty() || count == 0 {
115        return Vec::new();
116    }
117
118    let seed_hash = blake3::hash(&window_id.to_le_bytes());
119    let seed_bytes: [u8; 32] = *seed_hash.as_bytes();
120    let mut rng = ChaCha20Rng::from_seed(seed_bytes);
121
122    let mut shuffled: Vec<TestQuery> = query_corpus.to_vec();
123    shuffled.shuffle(&mut rng);
124
125    shuffled.truncate(count);
126    shuffled
127}
128
129#[cfg(test)]
130mod tests {
131    use super::*;
132    use uuid::Uuid;
133
134    fn make_corpus(n: usize) -> Vec<TestQuery> {
135        (0..n)
136            .map(|i| TestQuery {
137                query_text: format!("test query {}", i),
138                query_embedding: vec![i as i16; 10],
139                relevant_chunk_ids: vec![Uuid::from_bytes([i as u8; 16])],
140                relevance_grades: vec![1],
141            })
142            .collect()
143    }
144
145    #[test]
146    fn test_window_from_timestamp() {
147        // Timestamp 7200 seconds = 2 hours since epoch
148        let w = ValidationWindow::from_timestamp(7200);
149        assert_eq!(w.window_id, 2);
150        assert_eq!(w.start_secs, 7200);
151        assert_eq!(w.end_secs, 10800);
152    }
153
154    #[test]
155    fn test_window_from_timestamp_mid_hour() {
156        // 1.5 hours = 5400 seconds, should be in window 1
157        let w = ValidationWindow::from_timestamp(5400);
158        assert_eq!(w.window_id, 1);
159        assert_eq!(w.start_secs, 3600);
160        assert_eq!(w.end_secs, 7200);
161    }
162
163    #[test]
164    fn test_window_from_window_id() {
165        let w = ValidationWindow::from_window_id(100);
166        assert_eq!(w.window_id, 100);
167        assert_eq!(w.start_secs, 360000);
168        assert_eq!(w.end_secs, 363600);
169    }
170
171    #[test]
172    fn test_window_contains() {
173        let w = ValidationWindow::from_window_id(10);
174        assert!(w.contains(36000)); // Start of window
175        assert!(w.contains(37800)); // Middle
176        assert!(w.contains(39599)); // Just before end
177        assert!(!w.contains(39600)); // End (exclusive)
178        assert!(!w.contains(35999)); // Before start
179    }
180
181    #[test]
182    fn test_window_remaining_secs() {
183        let w = ValidationWindow::from_window_id(10);
184        assert_eq!(w.remaining_secs(36000), 3600); // Start: full hour
185        assert_eq!(w.remaining_secs(37800), 1800); // Middle: 30 min
186        assert_eq!(w.remaining_secs(39600), 0); // End: none
187        assert_eq!(w.remaining_secs(40000), 0); // Past end
188    }
189
190    #[test]
191    fn test_rng_deterministic() {
192        let w = ValidationWindow::from_window_id(42);
193        let mut rng1 = w.rng();
194        let mut rng2 = w.rng();
195
196        // Both RNGs should produce the same sequence
197        let mut corpus = make_corpus(20);
198        let mut corpus2 = corpus.clone();
199
200        corpus.shuffle(&mut rng1);
201        corpus2.shuffle(&mut rng2);
202
203        for (a, b) in corpus.iter().zip(corpus2.iter()) {
204            assert_eq!(a.query_text, b.query_text);
205        }
206    }
207
208    #[test]
209    fn test_rng_different_windows_produce_different_sequences() {
210        let w1 = ValidationWindow::from_window_id(42);
211        let w2 = ValidationWindow::from_window_id(43);
212        let mut rng1 = w1.rng();
213        let mut rng2 = w2.rng();
214
215        let mut corpus1 = make_corpus(20);
216        let mut corpus2 = corpus1.clone();
217
218        corpus1.shuffle(&mut rng1);
219        corpus2.shuffle(&mut rng2);
220
221        // At least one query should be in a different position
222        let same_count = corpus1
223            .iter()
224            .zip(corpus2.iter())
225            .filter(|(a, b)| a.query_text == b.query_text)
226            .count();
227        assert!(same_count < 20, "Different windows should produce different orderings");
228    }
229
230    #[test]
231    fn test_select_test_queries_count() {
232        let corpus = make_corpus(50);
233        let selected = select_test_queries(100, &corpus, 10);
234        assert_eq!(selected.len(), 10);
235    }
236
237    #[test]
238    fn test_select_test_queries_count_exceeds_corpus() {
239        let corpus = make_corpus(5);
240        let selected = select_test_queries(100, &corpus, 10);
241        assert_eq!(selected.len(), 5); // Can only return what exists
242    }
243
244    #[test]
245    fn test_select_test_queries_empty_corpus() {
246        let selected = select_test_queries(100, &[], 10);
247        assert!(selected.is_empty());
248    }
249
250    #[test]
251    fn test_select_test_queries_zero_count() {
252        let corpus = make_corpus(10);
253        let selected = select_test_queries(100, &corpus, 0);
254        assert!(selected.is_empty());
255    }
256
257    #[test]
258    fn test_select_test_queries_deterministic() {
259        let corpus = make_corpus(30);
260
261        let s1 = select_test_queries(42, &corpus, 10);
262        let s2 = select_test_queries(42, &corpus, 10);
263
264        assert_eq!(s1.len(), s2.len());
265        for (a, b) in s1.iter().zip(s2.iter()) {
266            assert_eq!(a.query_text, b.query_text);
267        }
268    }
269
270    #[test]
271    fn test_select_test_queries_different_windows() {
272        let corpus = make_corpus(30);
273
274        let s1 = select_test_queries(42, &corpus, 10);
275        let s2 = select_test_queries(43, &corpus, 10);
276
277        // Should select different subsets (with high probability)
278        let texts1: Vec<&str> = s1.iter().map(|q| q.query_text.as_str()).collect();
279        let texts2: Vec<&str> = s2.iter().map(|q| q.query_text.as_str()).collect();
280        assert_ne!(texts1, texts2);
281    }
282
283    #[test]
284    fn test_window_boundary_consistency() {
285        // Timestamps right at the boundary should be in the correct window
286        let w_before = ValidationWindow::from_timestamp(3599);
287        let w_at = ValidationWindow::from_timestamp(3600);
288        let w_after = ValidationWindow::from_timestamp(3601);
289
290        assert_eq!(w_before.window_id, 0);
291        assert_eq!(w_at.window_id, 1);
292        assert_eq!(w_after.window_id, 1);
293    }
294
295    #[test]
296    fn test_window_method_select_queries() {
297        let corpus = TestCorpus {
298            queries: make_corpus(30),
299        };
300        let window = ValidationWindow::from_window_id(42);
301        let selected = window.select_test_queries(&corpus, 5);
302        assert_eq!(selected.len(), 5);
303
304        // Should match the standalone function
305        let standalone = select_test_queries(42, &corpus.queries, 5);
306        for (a, b) in selected.iter().zip(standalone.iter()) {
307            assert_eq!(a.query_text, b.query_text);
308        }
309    }
310}