Skip to main content

cp_validator/
corpus.rs

1//! Test query corpus for validator evaluations.
2//!
3//! Per CP-015 section 4: Test queries are drawn from a canonical query
4//! corpus maintained by the network. The corpus is a set of query strings
5//! with known-relevant chunk IDs, stored on Arweave and updated through
6//! network consensus.
7
8use cp_arweave::ArweaveClient;
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::path::Path;
12use tracing::info;
13use uuid::Uuid;
14
15/// A single test query with ground-truth relevance information.
16///
17/// Per CP-015 section 4: each test query includes the query text,
18/// a pre-computed embedding, the set of known-relevant chunk IDs,
19/// and graded relevance scores.
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct TestQuery {
22    /// The query text.
23    pub query_text: String,
24
25    /// Pre-computed query embedding (i16 quantized, variable dimension).
26    /// This is the raw i16 vector as stored in Arweave, matching the
27    /// network's canonical embedding model.
28    pub query_embedding: Vec<i16>,
29
30    /// Ground-truth relevant chunk IDs for this query.
31    pub relevant_chunk_ids: Vec<Uuid>,
32
33    /// Graded relevance scores corresponding to `relevant_chunk_ids`.
34    /// Values 0-3 where 0 = not relevant, 1 = marginally relevant,
35    /// 2 = relevant, 3 = highly relevant.
36    /// Must be the same length as `relevant_chunk_ids`.
37    pub relevance_grades: Vec<u8>,
38}
39
40impl TestQuery {
41    /// Convert the i16 query embedding to the format expected by
42    /// `GraphStore::search` (f32).
43    pub fn query_embedding_f32(&self) -> Vec<f32> {
44        self.query_embedding
45            .iter()
46            .map(|&v| f32::from(v) / 32767.0)
47            .collect()
48    }
49
50    /// Return the i16 query embedding directly.
51    pub fn query_embedding_i16(&self) -> Vec<i16> {
52        self.query_embedding.clone()
53    }
54
55    /// Build a map from chunk ID to relevance grade for NDCG computation.
56    pub fn relevance_grade_map(&self) -> HashMap<Uuid, u8> {
57        self.relevant_chunk_ids
58            .iter()
59            .zip(self.relevance_grades.iter())
60            .map(|(id, &grade)| (*id, grade))
61            .collect()
62    }
63}
64
65/// A collection of test queries forming the canonical evaluation corpus.
66#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct TestCorpus {
68    /// The test queries in this corpus.
69    pub queries: Vec<TestQuery>,
70}
71
72impl TestCorpus {
73    /// Create an empty test corpus.
74    pub fn new() -> Self {
75        Self {
76            queries: Vec::new(),
77        }
78    }
79
80    /// Number of queries in the corpus.
81    pub fn len(&self) -> usize {
82        self.queries.len()
83    }
84
85    /// Whether the corpus is empty.
86    pub fn is_empty(&self) -> bool {
87        self.queries.is_empty()
88    }
89
90    /// Load a test corpus from a file on disk.
91    ///
92    /// Tries CBOR first, then JSON as fallback.
93    pub fn load_from_file(path: &Path) -> Result<Self, crate::ValidatorError> {
94        let bytes = std::fs::read(path).map_err(|e| {
95            crate::ValidatorError::Corpus(format!(
96                "Failed to read corpus file {}: {}",
97                path.display(),
98                e
99            ))
100        })?;
101
102        // Try CBOR first, then JSON as fallback
103        if let Ok(corpus) = ciborium::from_reader::<TestCorpus, _>(bytes.as_slice()) {
104            info!(queries = corpus.queries.len(), path = %path.display(), "Loaded corpus from CBOR");
105            return Ok(corpus);
106        }
107
108        let corpus: TestCorpus = serde_json::from_slice(&bytes).map_err(|e| {
109            crate::ValidatorError::Corpus(format!(
110                "Failed to parse corpus file {} (tried CBOR and JSON): {}",
111                path.display(),
112                e
113            ))
114        })?;
115
116        info!(queries = corpus.queries.len(), path = %path.display(), "Loaded corpus from JSON");
117        Ok(corpus)
118    }
119
120    /// Save the corpus to a CBOR file on disk.
121    pub fn save_to_file(&self, path: &Path) -> Result<(), crate::ValidatorError> {
122        let mut buf = Vec::new();
123        ciborium::into_writer(self, &mut buf).map_err(|e| {
124            crate::ValidatorError::Corpus(format!("Failed to serialize corpus: {e}"))
125        })?;
126
127        std::fs::write(path, &buf).map_err(|e| {
128            crate::ValidatorError::Corpus(format!(
129                "Failed to write corpus file {}: {}",
130                path.display(),
131                e
132            ))
133        })?;
134
135        info!(queries = self.queries.len(), path = %path.display(), "Saved corpus to CBOR");
136        Ok(())
137    }
138
139    /// Load the canonical test corpus from Arweave.
140    ///
141    /// Queries Arweave for the latest corpus transaction tagged with
142    /// Content-Type: application/x-canon-test-corpus, downloads and
143    /// deserializes it.
144    pub async fn load_from_arweave(arweave: &ArweaveClient) -> Result<Self, crate::ValidatorError> {
145        info!("Loading test corpus from Arweave");
146
147        // Query for corpus transactions
148        let tags = vec![cp_arweave::TagFilter::new(
149            "Content-Type",
150            &["application/x-canon-test-corpus"],
151        )];
152
153        let result = arweave
154            .query_transactions(&tags, 1, None, cp_arweave::SortOrder::HeightDesc)
155            .await
156            .map_err(|e| crate::ValidatorError::Arweave(format!("Failed to query corpus: {e}")))?;
157
158        if result.edges.is_empty() {
159            return Err(crate::ValidatorError::Corpus(
160                "No test corpus found on Arweave".to_string(),
161            ));
162        }
163
164        let tx_id = &result.edges[0].node.id;
165        info!(tx_id = tx_id, "Found corpus transaction");
166
167        let data = arweave.get_transaction_data(tx_id).await.map_err(|e| {
168            crate::ValidatorError::Arweave(format!("Failed to download corpus: {e}"))
169        })?;
170
171        // Decompress if needed (try zstd first, then raw CBOR)
172        let corpus_bytes = match zstd::decode_all(data.as_slice()) {
173            Ok(decompressed) => decompressed,
174            Err(_) => data,
175        };
176
177        let corpus: TestCorpus = ciborium::from_reader(corpus_bytes.as_slice()).map_err(|e| {
178            crate::ValidatorError::Corpus(format!("Failed to parse corpus from Arweave: {e}"))
179        })?;
180
181        info!(queries = corpus.queries.len(), "Loaded corpus from Arweave");
182        Ok(corpus)
183    }
184}
185
186impl Default for TestCorpus {
187    fn default() -> Self {
188        Self::new()
189    }
190}
191
192#[cfg(test)]
193mod tests {
194    use super::*;
195
196    fn sample_query(i: usize) -> TestQuery {
197        TestQuery {
198            query_text: format!("What is topic {i}?"),
199            query_embedding: vec![i as i16; 10],
200            relevant_chunk_ids: vec![
201                Uuid::from_bytes([i as u8; 16]),
202                Uuid::from_bytes([(i + 1) as u8; 16]),
203            ],
204            relevance_grades: vec![3, 1],
205        }
206    }
207
208    #[test]
209    fn test_test_query_relevance_grade_map() {
210        let query = sample_query(0);
211        let map = query.relevance_grade_map();
212        assert_eq!(map.len(), 2);
213        assert_eq!(*map.get(&Uuid::from_bytes([0u8; 16])).unwrap(), 3);
214        assert_eq!(*map.get(&Uuid::from_bytes([1u8; 16])).unwrap(), 1);
215    }
216
217    #[test]
218    fn test_test_query_embedding_i16() {
219        let query = sample_query(5);
220        let emb = query.query_embedding_i16();
221        assert_eq!(emb, vec![5i16; 10]);
222    }
223
224    #[test]
225    fn test_test_query_embedding_f32() {
226        let query = sample_query(5);
227        let emb = query.query_embedding_f32();
228        assert_eq!(emb.len(), 10);
229        // 5 / 32767.0
230        assert!((emb[0] - 5.0 / 32767.0).abs() < 1e-6);
231    }
232
233    #[test]
234    fn test_corpus_new_empty() {
235        let corpus = TestCorpus::new();
236        assert!(corpus.is_empty());
237        assert_eq!(corpus.len(), 0);
238    }
239
240    #[test]
241    fn test_corpus_with_queries() {
242        let corpus = TestCorpus {
243            queries: (0..10).map(sample_query).collect(),
244        };
245        assert!(!corpus.is_empty());
246        assert_eq!(corpus.len(), 10);
247    }
248
249    #[test]
250    fn test_corpus_save_load_cbor_roundtrip() {
251        let corpus = TestCorpus {
252            queries: (0..5).map(sample_query).collect(),
253        };
254
255        let tmp = tempfile::NamedTempFile::new().unwrap();
256        let path = tmp.path().to_path_buf();
257
258        corpus.save_to_file(&path).unwrap();
259        let loaded = TestCorpus::load_from_file(&path).unwrap();
260
261        assert_eq!(loaded.len(), corpus.len());
262        for (a, b) in loaded.queries.iter().zip(corpus.queries.iter()) {
263            assert_eq!(a.query_text, b.query_text);
264            assert_eq!(a.query_embedding, b.query_embedding);
265            assert_eq!(a.relevant_chunk_ids, b.relevant_chunk_ids);
266            assert_eq!(a.relevance_grades, b.relevance_grades);
267        }
268    }
269
270    #[test]
271    fn test_corpus_load_json() {
272        let corpus = TestCorpus {
273            queries: vec![sample_query(0)],
274        };
275
276        let json = serde_json::to_vec(&corpus).unwrap();
277        let tmp = tempfile::NamedTempFile::new().unwrap();
278        std::fs::write(tmp.path(), &json).unwrap();
279
280        let loaded = TestCorpus::load_from_file(tmp.path()).unwrap();
281        assert_eq!(loaded.len(), 1);
282        assert_eq!(loaded.queries[0].query_text, "What is topic 0?");
283    }
284
285    #[test]
286    fn test_corpus_load_nonexistent() {
287        let result = TestCorpus::load_from_file(Path::new("/nonexistent/corpus.cbor"));
288        assert!(result.is_err());
289    }
290
291    #[test]
292    fn test_corpus_cbor_serialization() {
293        let corpus = TestCorpus {
294            queries: (0..3).map(sample_query).collect(),
295        };
296
297        let mut buf = Vec::new();
298        ciborium::into_writer(&corpus, &mut buf).unwrap();
299        let decoded: TestCorpus = ciborium::from_reader(buf.as_slice()).unwrap();
300
301        assert_eq!(decoded.len(), 3);
302    }
303
304    #[test]
305    fn test_test_query_serialization() {
306        let query = sample_query(42);
307        let json = serde_json::to_string(&query).unwrap();
308        let decoded: TestQuery = serde_json::from_str(&json).unwrap();
309        assert_eq!(decoded.query_text, query.query_text);
310        assert_eq!(decoded.relevant_chunk_ids, query.relevant_chunk_ids);
311    }
312}