1use crate::corpus::{TestCorpus, TestQuery};
9use rand::seq::SliceRandom;
10use rand::SeedableRng;
11use rand_chacha::ChaCha20Rng;
12use serde::{Deserialize, Serialize};
13
14const WINDOW_DURATION_SECS: u64 = 3600; #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
22pub struct ValidationWindow {
23 pub window_id: u64,
25 pub start_secs: u64,
27 pub end_secs: u64,
29}
30
31impl ValidationWindow {
32 pub fn current() -> Self {
34 let now_secs = std::time::SystemTime::now()
35 .duration_since(std::time::UNIX_EPOCH)
36 .unwrap()
37 .as_secs();
38 Self::from_timestamp(now_secs)
39 }
40
41 pub fn from_timestamp(unix_secs: u64) -> Self {
43 let window_id = unix_secs / WINDOW_DURATION_SECS;
44 let start_secs = window_id * WINDOW_DURATION_SECS;
45 let end_secs = start_secs + WINDOW_DURATION_SECS;
46
47 Self {
48 window_id,
49 start_secs,
50 end_secs,
51 }
52 }
53
54 pub fn from_window_id(window_id: u64) -> Self {
56 let start_secs = window_id * WINDOW_DURATION_SECS;
57 let end_secs = start_secs + WINDOW_DURATION_SECS;
58
59 Self {
60 window_id,
61 start_secs,
62 end_secs,
63 }
64 }
65
66 pub fn rng(&self) -> ChaCha20Rng {
71 let seed_hash = blake3::hash(&self.window_id.to_le_bytes());
72 let seed_bytes: [u8; 32] = *seed_hash.as_bytes();
73 ChaCha20Rng::from_seed(seed_bytes)
74 }
75
76 pub fn contains(&self, unix_secs: u64) -> bool {
78 unix_secs >= self.start_secs && unix_secs < self.end_secs
79 }
80
81 pub fn remaining_secs(&self, unix_secs: u64) -> u64 {
83 self.end_secs.saturating_sub(unix_secs)
84 }
85
86 pub fn select_test_queries(&self, corpus: &TestCorpus, count: usize) -> Vec<TestQuery> {
91 select_test_queries(self.window_id, &corpus.queries, count)
92 }
93}
94
95pub fn select_test_queries(
102 window_id: u64,
103 query_corpus: &[TestQuery],
104 count: usize,
105) -> Vec<TestQuery> {
106 if query_corpus.is_empty() || count == 0 {
107 return Vec::new();
108 }
109
110 let seed_hash = blake3::hash(&window_id.to_le_bytes());
111 let seed_bytes: [u8; 32] = *seed_hash.as_bytes();
112 let mut rng = ChaCha20Rng::from_seed(seed_bytes);
113
114 let mut shuffled: Vec<TestQuery> = query_corpus.to_vec();
115 shuffled.shuffle(&mut rng);
116
117 shuffled.truncate(count);
118 shuffled
119}
120
121#[cfg(test)]
122mod tests {
123 use super::*;
124 use uuid::Uuid;
125
126 fn make_corpus(n: usize) -> Vec<TestQuery> {
127 (0..n)
128 .map(|i| TestQuery {
129 query_text: format!("test query {i}"),
130 query_embedding: vec![i as i16; 10],
131 relevant_chunk_ids: vec![Uuid::from_bytes([i as u8; 16])],
132 relevance_grades: vec![1],
133 })
134 .collect()
135 }
136
137 #[test]
138 fn test_window_from_timestamp() {
139 let w = ValidationWindow::from_timestamp(7200);
141 assert_eq!(w.window_id, 2);
142 assert_eq!(w.start_secs, 7200);
143 assert_eq!(w.end_secs, 10800);
144 }
145
146 #[test]
147 fn test_window_from_timestamp_mid_hour() {
148 let w = ValidationWindow::from_timestamp(5400);
150 assert_eq!(w.window_id, 1);
151 assert_eq!(w.start_secs, 3600);
152 assert_eq!(w.end_secs, 7200);
153 }
154
155 #[test]
156 fn test_window_from_window_id() {
157 let w = ValidationWindow::from_window_id(100);
158 assert_eq!(w.window_id, 100);
159 assert_eq!(w.start_secs, 360000);
160 assert_eq!(w.end_secs, 363600);
161 }
162
163 #[test]
164 fn test_window_contains() {
165 let w = ValidationWindow::from_window_id(10);
166 assert!(w.contains(36000)); assert!(w.contains(37800)); assert!(w.contains(39599)); assert!(!w.contains(39600)); assert!(!w.contains(35999)); }
172
173 #[test]
174 fn test_window_remaining_secs() {
175 let w = ValidationWindow::from_window_id(10);
176 assert_eq!(w.remaining_secs(36000), 3600); assert_eq!(w.remaining_secs(37800), 1800); assert_eq!(w.remaining_secs(39600), 0); assert_eq!(w.remaining_secs(40000), 0); }
181
182 #[test]
183 fn test_rng_deterministic() {
184 let w = ValidationWindow::from_window_id(42);
185 let mut rng1 = w.rng();
186 let mut rng2 = w.rng();
187
188 let mut corpus = make_corpus(20);
190 let mut corpus2 = corpus.clone();
191
192 corpus.shuffle(&mut rng1);
193 corpus2.shuffle(&mut rng2);
194
195 for (a, b) in corpus.iter().zip(corpus2.iter()) {
196 assert_eq!(a.query_text, b.query_text);
197 }
198 }
199
200 #[test]
201 fn test_rng_different_windows_produce_different_sequences() {
202 let w1 = ValidationWindow::from_window_id(42);
203 let w2 = ValidationWindow::from_window_id(43);
204 let mut rng1 = w1.rng();
205 let mut rng2 = w2.rng();
206
207 let mut corpus1 = make_corpus(20);
208 let mut corpus2 = corpus1.clone();
209
210 corpus1.shuffle(&mut rng1);
211 corpus2.shuffle(&mut rng2);
212
213 let same_count = corpus1
215 .iter()
216 .zip(corpus2.iter())
217 .filter(|(a, b)| a.query_text == b.query_text)
218 .count();
219 assert!(
220 same_count < 20,
221 "Different windows should produce different orderings"
222 );
223 }
224
225 #[test]
226 fn test_select_test_queries_count() {
227 let corpus = make_corpus(50);
228 let selected = select_test_queries(100, &corpus, 10);
229 assert_eq!(selected.len(), 10);
230 }
231
232 #[test]
233 fn test_select_test_queries_count_exceeds_corpus() {
234 let corpus = make_corpus(5);
235 let selected = select_test_queries(100, &corpus, 10);
236 assert_eq!(selected.len(), 5); }
238
239 #[test]
240 fn test_select_test_queries_empty_corpus() {
241 let selected = select_test_queries(100, &[], 10);
242 assert!(selected.is_empty());
243 }
244
245 #[test]
246 fn test_select_test_queries_zero_count() {
247 let corpus = make_corpus(10);
248 let selected = select_test_queries(100, &corpus, 0);
249 assert!(selected.is_empty());
250 }
251
252 #[test]
253 fn test_select_test_queries_deterministic() {
254 let corpus = make_corpus(30);
255
256 let s1 = select_test_queries(42, &corpus, 10);
257 let s2 = select_test_queries(42, &corpus, 10);
258
259 assert_eq!(s1.len(), s2.len());
260 for (a, b) in s1.iter().zip(s2.iter()) {
261 assert_eq!(a.query_text, b.query_text);
262 }
263 }
264
265 #[test]
266 fn test_select_test_queries_different_windows() {
267 let corpus = make_corpus(30);
268
269 let s1 = select_test_queries(42, &corpus, 10);
270 let s2 = select_test_queries(43, &corpus, 10);
271
272 let texts1: Vec<&str> = s1.iter().map(|q| q.query_text.as_str()).collect();
274 let texts2: Vec<&str> = s2.iter().map(|q| q.query_text.as_str()).collect();
275 assert_ne!(texts1, texts2);
276 }
277
278 #[test]
279 fn test_window_boundary_consistency() {
280 let w_before = ValidationWindow::from_timestamp(3599);
282 let w_at = ValidationWindow::from_timestamp(3600);
283 let w_after = ValidationWindow::from_timestamp(3601);
284
285 assert_eq!(w_before.window_id, 0);
286 assert_eq!(w_at.window_id, 1);
287 assert_eq!(w_after.window_id, 1);
288 }
289
290 #[test]
291 fn test_window_method_select_queries() {
292 let corpus = TestCorpus {
293 queries: make_corpus(30),
294 };
295 let window = ValidationWindow::from_window_id(42);
296 let selected = window.select_test_queries(&corpus, 5);
297 assert_eq!(selected.len(), 5);
298
299 let standalone = select_test_queries(42, &corpus.queries, 5);
301 for (a, b) in selected.iter().zip(standalone.iter()) {
302 assert_eq!(a.query_text, b.query_text);
303 }
304 }
305}