1use crate::corpus::{TestCorpus, TestQuery};
9use rand::seq::SliceRandom;
10use rand::SeedableRng;
11use rand_chacha::ChaCha20Rng;
12use serde::{Deserialize, Serialize};
13
14const WINDOW_DURATION_SECS: u64 = 3600; #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
22pub struct ValidationWindow {
23 pub window_id: u64,
25 pub start_secs: u64,
27 pub end_secs: u64,
29}
30
31impl ValidationWindow {
32 pub fn current() -> Self {
34 let now_secs = std::time::SystemTime::now()
35 .duration_since(std::time::UNIX_EPOCH)
36 .unwrap()
37 .as_secs();
38 Self::from_timestamp(now_secs)
39 }
40
41 pub fn from_timestamp(unix_secs: u64) -> Self {
43 let window_id = unix_secs / WINDOW_DURATION_SECS;
44 let start_secs = window_id * WINDOW_DURATION_SECS;
45 let end_secs = start_secs + WINDOW_DURATION_SECS;
46
47 Self {
48 window_id,
49 start_secs,
50 end_secs,
51 }
52 }
53
54 pub fn from_window_id(window_id: u64) -> Self {
56 let start_secs = window_id * WINDOW_DURATION_SECS;
57 let end_secs = start_secs + WINDOW_DURATION_SECS;
58
59 Self {
60 window_id,
61 start_secs,
62 end_secs,
63 }
64 }
65
66 pub fn rng(&self) -> ChaCha20Rng {
71 let seed_hash = blake3::hash(&self.window_id.to_le_bytes());
72 let seed_bytes: [u8; 32] = *seed_hash.as_bytes();
73 ChaCha20Rng::from_seed(seed_bytes)
74 }
75
76 pub fn contains(&self, unix_secs: u64) -> bool {
78 unix_secs >= self.start_secs && unix_secs < self.end_secs
79 }
80
81 pub fn remaining_secs(&self, unix_secs: u64) -> u64 {
83 if unix_secs >= self.end_secs {
84 0
85 } else {
86 self.end_secs - unix_secs
87 }
88 }
89
90 pub fn select_test_queries(
95 &self,
96 corpus: &TestCorpus,
97 count: usize,
98 ) -> Vec<TestQuery> {
99 select_test_queries(self.window_id, &corpus.queries, count)
100 }
101}
102
103pub fn select_test_queries(
110 window_id: u64,
111 query_corpus: &[TestQuery],
112 count: usize,
113) -> Vec<TestQuery> {
114 if query_corpus.is_empty() || count == 0 {
115 return Vec::new();
116 }
117
118 let seed_hash = blake3::hash(&window_id.to_le_bytes());
119 let seed_bytes: [u8; 32] = *seed_hash.as_bytes();
120 let mut rng = ChaCha20Rng::from_seed(seed_bytes);
121
122 let mut shuffled: Vec<TestQuery> = query_corpus.to_vec();
123 shuffled.shuffle(&mut rng);
124
125 shuffled.truncate(count);
126 shuffled
127}
128
129#[cfg(test)]
130mod tests {
131 use super::*;
132 use uuid::Uuid;
133
134 fn make_corpus(n: usize) -> Vec<TestQuery> {
135 (0..n)
136 .map(|i| TestQuery {
137 query_text: format!("test query {}", i),
138 query_embedding: vec![i as i16; 10],
139 relevant_chunk_ids: vec![Uuid::from_bytes([i as u8; 16])],
140 relevance_grades: vec![1],
141 })
142 .collect()
143 }
144
145 #[test]
146 fn test_window_from_timestamp() {
147 let w = ValidationWindow::from_timestamp(7200);
149 assert_eq!(w.window_id, 2);
150 assert_eq!(w.start_secs, 7200);
151 assert_eq!(w.end_secs, 10800);
152 }
153
154 #[test]
155 fn test_window_from_timestamp_mid_hour() {
156 let w = ValidationWindow::from_timestamp(5400);
158 assert_eq!(w.window_id, 1);
159 assert_eq!(w.start_secs, 3600);
160 assert_eq!(w.end_secs, 7200);
161 }
162
163 #[test]
164 fn test_window_from_window_id() {
165 let w = ValidationWindow::from_window_id(100);
166 assert_eq!(w.window_id, 100);
167 assert_eq!(w.start_secs, 360000);
168 assert_eq!(w.end_secs, 363600);
169 }
170
171 #[test]
172 fn test_window_contains() {
173 let w = ValidationWindow::from_window_id(10);
174 assert!(w.contains(36000)); assert!(w.contains(37800)); assert!(w.contains(39599)); assert!(!w.contains(39600)); assert!(!w.contains(35999)); }
180
181 #[test]
182 fn test_window_remaining_secs() {
183 let w = ValidationWindow::from_window_id(10);
184 assert_eq!(w.remaining_secs(36000), 3600); assert_eq!(w.remaining_secs(37800), 1800); assert_eq!(w.remaining_secs(39600), 0); assert_eq!(w.remaining_secs(40000), 0); }
189
190 #[test]
191 fn test_rng_deterministic() {
192 let w = ValidationWindow::from_window_id(42);
193 let mut rng1 = w.rng();
194 let mut rng2 = w.rng();
195
196 let mut corpus = make_corpus(20);
198 let mut corpus2 = corpus.clone();
199
200 corpus.shuffle(&mut rng1);
201 corpus2.shuffle(&mut rng2);
202
203 for (a, b) in corpus.iter().zip(corpus2.iter()) {
204 assert_eq!(a.query_text, b.query_text);
205 }
206 }
207
208 #[test]
209 fn test_rng_different_windows_produce_different_sequences() {
210 let w1 = ValidationWindow::from_window_id(42);
211 let w2 = ValidationWindow::from_window_id(43);
212 let mut rng1 = w1.rng();
213 let mut rng2 = w2.rng();
214
215 let mut corpus1 = make_corpus(20);
216 let mut corpus2 = corpus1.clone();
217
218 corpus1.shuffle(&mut rng1);
219 corpus2.shuffle(&mut rng2);
220
221 let same_count = corpus1
223 .iter()
224 .zip(corpus2.iter())
225 .filter(|(a, b)| a.query_text == b.query_text)
226 .count();
227 assert!(same_count < 20, "Different windows should produce different orderings");
228 }
229
230 #[test]
231 fn test_select_test_queries_count() {
232 let corpus = make_corpus(50);
233 let selected = select_test_queries(100, &corpus, 10);
234 assert_eq!(selected.len(), 10);
235 }
236
237 #[test]
238 fn test_select_test_queries_count_exceeds_corpus() {
239 let corpus = make_corpus(5);
240 let selected = select_test_queries(100, &corpus, 10);
241 assert_eq!(selected.len(), 5); }
243
244 #[test]
245 fn test_select_test_queries_empty_corpus() {
246 let selected = select_test_queries(100, &[], 10);
247 assert!(selected.is_empty());
248 }
249
250 #[test]
251 fn test_select_test_queries_zero_count() {
252 let corpus = make_corpus(10);
253 let selected = select_test_queries(100, &corpus, 0);
254 assert!(selected.is_empty());
255 }
256
257 #[test]
258 fn test_select_test_queries_deterministic() {
259 let corpus = make_corpus(30);
260
261 let s1 = select_test_queries(42, &corpus, 10);
262 let s2 = select_test_queries(42, &corpus, 10);
263
264 assert_eq!(s1.len(), s2.len());
265 for (a, b) in s1.iter().zip(s2.iter()) {
266 assert_eq!(a.query_text, b.query_text);
267 }
268 }
269
270 #[test]
271 fn test_select_test_queries_different_windows() {
272 let corpus = make_corpus(30);
273
274 let s1 = select_test_queries(42, &corpus, 10);
275 let s2 = select_test_queries(43, &corpus, 10);
276
277 let texts1: Vec<&str> = s1.iter().map(|q| q.query_text.as_str()).collect();
279 let texts2: Vec<&str> = s2.iter().map(|q| q.query_text.as_str()).collect();
280 assert_ne!(texts1, texts2);
281 }
282
283 #[test]
284 fn test_window_boundary_consistency() {
285 let w_before = ValidationWindow::from_timestamp(3599);
287 let w_at = ValidationWindow::from_timestamp(3600);
288 let w_after = ValidationWindow::from_timestamp(3601);
289
290 assert_eq!(w_before.window_id, 0);
291 assert_eq!(w_at.window_id, 1);
292 assert_eq!(w_after.window_id, 1);
293 }
294
295 #[test]
296 fn test_window_method_select_queries() {
297 let corpus = TestCorpus {
298 queries: make_corpus(30),
299 };
300 let window = ValidationWindow::from_window_id(42);
301 let selected = window.select_test_queries(&corpus, 5);
302 assert_eq!(selected.len(), 5);
303
304 let standalone = select_test_queries(42, &corpus.queries, 5);
306 for (a, b) in selected.iter().zip(standalone.iter()) {
307 assert_eq!(a.query_text, b.query_text);
308 }
309 }
310}