1use crate::corpus::TestQuery;
11use crate::rating::Rating;
12use crate::window::ValidationWindow;
13use cp_arweave::ArweaveClient;
14use cp_graph::GraphStore;
15use cp_tor::types::{PeerRegistration, SearchResponse, SearchStatus};
16use ed25519_dalek::{Signer, SigningKey};
17use serde::{Deserialize, Serialize};
18use serde_big_array::BigArray;
19use std::collections::HashMap;
20use std::time::Instant;
21use tracing::{debug, info, warn};
22use uuid::Uuid;
23
24#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct QualityMetrics {
29 pub precision_at_10: f32,
31 pub ndcg_at_10: f32,
33 pub mrr: f32,
35 pub merkle_proofs_valid: u32,
37 pub merkle_proofs_invalid: u32,
39 pub adapter_perplexity_delta: Option<f32>,
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct EvaluationResult {
48 pub contributor_key: [u8; 32],
50 pub validator_node_id: [u8; 16],
52 pub window_id: u64,
54 pub metrics: QualityMetrics,
56 pub timestamp: i64,
58 #[serde(with = "BigArray")]
60 pub signature: [u8; 64],
61}
62
63impl EvaluationResult {
64 pub fn signing_bytes(&self) -> [u8; 32] {
66 let signable = EvaluationResultSignable {
67 contributor_key: &self.contributor_key,
68 window_id: self.window_id,
69 metrics: &self.metrics,
70 timestamp: self.timestamp,
71 };
72 let mut buf = Vec::new();
73 ciborium::into_writer(&signable, &mut buf).expect("CBOR serialization cannot fail");
74 *blake3::hash(&buf).as_bytes()
75 }
76}
77
78#[derive(Serialize)]
79struct EvaluationResultSignable<'a> {
80 contributor_key: &'a [u8; 32],
81 window_id: u64,
82 metrics: &'a QualityMetrics,
83 timestamp: i64,
84}
85
86#[derive(Debug, Clone, Serialize, Deserialize)]
90pub struct PeerQualityMetrics {
91 pub precision_at_10: f32,
93 pub ndcg_at_10: f32,
95 pub mrr: f32,
97 pub response_latency_ms: u16,
99 pub proof_validity_rate: f32,
101 pub availability_rate: f32,
103}
104
105#[derive(Debug, Clone, Serialize, Deserialize)]
109pub struct PeerTestResult {
110 pub peer_node_id: [u8; 16],
112 pub peer_onion: String,
114 pub validator_node_id: [u8; 16],
116 pub window_id: u64,
118 pub metrics: PeerQualityMetrics,
120 pub timestamp: i64,
122 #[serde(with = "BigArray")]
124 pub signature: [u8; 64],
125}
126
127impl PeerTestResult {
128 pub fn signing_bytes(&self) -> [u8; 32] {
130 let signable = PeerTestResultSignable {
131 peer_node_id: &self.peer_node_id,
132 peer_onion: &self.peer_onion,
133 window_id: self.window_id,
134 metrics: &self.metrics,
135 timestamp: self.timestamp,
136 };
137 let mut buf = Vec::new();
138 ciborium::into_writer(&signable, &mut buf).expect("CBOR serialization cannot fail");
139 *blake3::hash(&buf).as_bytes()
140 }
141}
142
143#[derive(Serialize)]
144struct PeerTestResultSignable<'a> {
145 peer_node_id: &'a [u8; 16],
146 peer_onion: &'a str,
147 window_id: u64,
148 metrics: &'a PeerQualityMetrics,
149 timestamp: i64,
150}
151
152pub fn precision_at_k(returned: &[Uuid], relevant: &[Uuid], k: usize) -> f32 {
163 if k == 0 || returned.is_empty() {
164 return 0.0;
165 }
166
167 let take = returned.len().min(k);
168 let relevant_count = returned[..take]
169 .iter()
170 .filter(|id| relevant.contains(id))
171 .count();
172
173 relevant_count as f32 / k as f32
174}
175
176pub fn ndcg_at_k(
191 returned: &[Uuid],
192 relevant: &[Uuid],
193 relevance_grades: &HashMap<Uuid, u8>,
194 k: usize,
195) -> f32 {
196 if k == 0 || returned.is_empty() || relevant.is_empty() {
197 return 0.0;
198 }
199
200 let take = returned.len().min(k);
201
202 let dcg = compute_dcg(&returned[..take], relevant, relevance_grades);
204
205 let mut ideal_grades: Vec<u8> = relevant
207 .iter()
208 .map(|id| *relevance_grades.get(id).unwrap_or(&1))
209 .collect();
210 ideal_grades.sort_unstable_by(|a, b| b.cmp(a));
211 ideal_grades.truncate(k);
212
213 let idcg: f64 = ideal_grades
214 .iter()
215 .enumerate()
216 .map(|(i, &grade)| {
217 let gain = (2.0_f64).powi(grade as i32) - 1.0;
218 gain / (i as f64 + 2.0).log2()
219 })
220 .sum();
221
222 if idcg == 0.0 {
223 return 0.0;
224 }
225
226 (dcg / idcg) as f32
227}
228
229fn compute_dcg(
231 ranked: &[Uuid],
232 relevant: &[Uuid],
233 relevance_grades: &HashMap<Uuid, u8>,
234) -> f64 {
235 ranked
236 .iter()
237 .enumerate()
238 .map(|(i, id)| {
239 let grade = if let Some(&g) = relevance_grades.get(id) {
240 g
241 } else if relevant.contains(id) {
242 1
243 } else {
244 0
245 };
246 let gain = (2.0_f64).powi(grade as i32) - 1.0;
247 gain / (i as f64 + 2.0).log2()
248 })
249 .sum()
250}
251
252pub fn mrr(returned: &[Uuid], relevant: &[Uuid]) -> f32 {
257 for (i, id) in returned.iter().enumerate() {
258 if relevant.contains(id) {
259 return 1.0 / (i as f32 + 1.0);
260 }
261 }
262 0.0
263}
264
265pub fn composite_score(metrics: &QualityMetrics) -> f32 {
272 let proof_rate = if metrics.merkle_proofs_valid + metrics.merkle_proofs_invalid > 0 {
273 metrics.merkle_proofs_valid as f32
274 / (metrics.merkle_proofs_valid + metrics.merkle_proofs_invalid) as f32
275 } else {
276 0.0
277 };
278
279 let mut score = 0.4 * metrics.ndcg_at_10
280 + 0.3 * metrics.precision_at_10
281 + 0.2 * metrics.mrr
282 + 0.1 * proof_rate;
283
284 if let Some(delta) = metrics.adapter_perplexity_delta {
286 if delta > 0.0 {
287 score *= 0.8;
288 }
289 }
290
291 score
292}
293
294pub fn peer_composite_score(metrics: &PeerQualityMetrics) -> f32 {
299 let quality = 0.4 * metrics.ndcg_at_10
300 + 0.2 * metrics.precision_at_10
301 + 0.1 * metrics.mrr;
302
303 let reliability = metrics.proof_validity_rate * metrics.availability_rate;
304
305 let latency_score = (1.0 - metrics.response_latency_ms as f32 / 5000.0).max(0.0);
307
308 0.7 * quality + 0.2 * reliability + 0.1 * latency_score
309}
310
311pub async fn evaluate_contributor(
316 contributor_key: [u8; 32],
317 arweave: &ArweaveClient,
318 test_queries: &[TestQuery],
319 validator_node_id: [u8; 16],
320 signing_key: &SigningKey,
321) -> Result<EvaluationResult, crate::ValidatorError> {
322 info!(
323 contributor = hex::encode(contributor_key),
324 queries = test_queries.len(),
325 "Evaluating contributor"
326 );
327
328 let contributor_hex = hex::encode(contributor_key);
330 let (discovered, _cursor) =
331 cp_arweave::discover_diffs_by_contributor(arweave, &contributor_hex, 50, None)
332 .await
333 .map_err(|e| crate::ValidatorError::Arweave(e.to_string()))?;
334
335 if discovered.is_empty() {
336 return Err(crate::ValidatorError::NoContent(contributor_hex));
337 }
338
339 let temp_dir = tempfile::tempdir()
341 .map_err(|e| crate::ValidatorError::Internal(format!("Failed to create temp dir: {}", e)))?;
342 let db_path = temp_dir.path().join("eval.db");
343 let db_path_str = db_path.to_string_lossy().to_string();
344
345 let mut local_index = GraphStore::open(&db_path_str)
346 .map_err(|e| crate::ValidatorError::Internal(format!("Failed to open graph store: {}", e)))?;
347
348 let mut downloaded_count = 0u64;
349 for diff_meta in &discovered {
350 match cp_arweave::download_diff(arweave, diff_meta).await {
351 Ok(downloaded) => {
352 for doc in &downloaded.diff.added_docs {
353 let _ = local_index.insert_document(doc);
354 }
355 for chunk in &downloaded.diff.added_chunks {
356 let _ = local_index.insert_chunk(chunk);
357 }
358 for emb in &downloaded.diff.added_embeddings {
359 let _ = local_index.insert_embedding(emb);
360 }
361 for edge in &downloaded.diff.added_edges {
362 let _ = local_index.add_edge(edge);
363 }
364 downloaded_count += 1;
365 }
366 Err(e) => {
367 warn!(tx_id = diff_meta.tx_id, error = %e, "Failed to download diff");
368 }
369 }
370 }
371
372 if downloaded_count == 0 {
373 return Err(crate::ValidatorError::NoContent(contributor_hex));
374 }
375
376 info!(
377 contributor = hex::encode(contributor_key),
378 diffs = downloaded_count,
379 "Indexed contributor content"
380 );
381
382 let mut precision_sum = 0.0f32;
384 let mut ndcg_sum = 0.0f32;
385 let mut mrr_sum = 0.0f32;
386 let mut query_count = 0u32;
387
388 for query in test_queries {
389 let query_f32: Vec<f32> = query.query_embedding.iter().map(|&v| v as f32 / 32767.0).collect();
391
392 let results = local_index.search(&query_f32, 10);
393
394 match results {
395 Ok(result_ids) => {
396 let returned_uuids: Vec<Uuid> = result_ids
397 .iter()
398 .map(|(id, _score)| *id)
399 .collect();
400
401 let relevant_uuids: Vec<Uuid> = query.relevant_chunk_ids.clone();
402
403 precision_sum += precision_at_k(&returned_uuids, &relevant_uuids, 10);
404 ndcg_sum += ndcg_at_k(
405 &returned_uuids,
406 &relevant_uuids,
407 &query.relevance_grade_map(),
408 10,
409 );
410 mrr_sum += mrr(&returned_uuids, &relevant_uuids);
411 query_count += 1;
412 }
413 Err(e) => {
414 debug!(error = %e, "Search failed for query");
415 }
416 }
417 }
418
419 let mut proofs_valid = 0u32;
422 let mut proofs_invalid = 0u32;
423 let all_docs = local_index
424 .get_all_documents()
425 .unwrap_or_default();
426 for doc in &all_docs {
427 if doc.hierarchical_hash == [0u8; 32] {
428 proofs_invalid += 1;
430 continue;
431 }
432 let chunks = local_index.get_chunks_for_doc(doc.id).unwrap_or_default();
433 if chunks.is_empty() {
434 proofs_invalid += 1;
435 continue;
436 }
437 let chunk_hashes: Vec<[u8; 32]> = chunks.iter().map(|c| c.text_hash).collect();
438 let recomputed = cp_core::Document::compute_hierarchical_hash(&chunk_hashes);
439 if recomputed == doc.hierarchical_hash {
440 proofs_valid += 1;
441 } else {
442 proofs_invalid += 1;
443 }
444 }
445
446 let metrics = if query_count > 0 {
447 QualityMetrics {
448 precision_at_10: precision_sum / query_count as f32,
449 ndcg_at_10: ndcg_sum / query_count as f32,
450 mrr: mrr_sum / query_count as f32,
451 merkle_proofs_valid: proofs_valid,
452 merkle_proofs_invalid: proofs_invalid,
453 adapter_perplexity_delta: None,
454 }
455 } else {
456 QualityMetrics {
457 precision_at_10: 0.0,
458 ndcg_at_10: 0.0,
459 mrr: 0.0,
460 merkle_proofs_valid: proofs_valid,
461 merkle_proofs_invalid: proofs_invalid,
462 adapter_perplexity_delta: None,
463 }
464 };
465
466 let window = ValidationWindow::current();
467 let now = std::time::SystemTime::now()
468 .duration_since(std::time::UNIX_EPOCH)
469 .unwrap()
470 .as_millis() as i64;
471
472 let mut result = EvaluationResult {
473 contributor_key,
474 validator_node_id,
475 window_id: window.window_id,
476 metrics,
477 timestamp: now,
478 signature: [0u8; 64],
479 };
480
481 let hash = result.signing_bytes();
483 let sig = signing_key.sign(&hash);
484 result.signature = sig.to_bytes();
485
486 Ok(result)
487}
488
489pub async fn evaluate_peer(
495 peer: &PeerRegistration,
496 test_queries: &[TestQuery],
497 validator_node_id: [u8; 16],
498 signing_key: &SigningKey,
499 model_hash: [u8; 32],
500 tor_runtime: &cp_tor::TorRuntime,
501) -> Result<PeerTestResult, crate::ValidatorError> {
502 info!(
503 peer = hex::encode(peer.node_id),
504 onion = &peer.onion_address,
505 "Testing live peer"
506 );
507
508 let selected = if test_queries.len() > 5 {
510 &test_queries[..5]
511 } else {
512 test_queries
513 };
514
515 let mut precision_sum = 0.0f32;
516 let mut ndcg_sum = 0.0f32;
517 let mut mrr_sum = 0.0f32;
518 let mut latency_sum = 0u64;
519 let mut valid_proofs = 0u32;
520 let mut total_proofs = 0u32;
521 let mut successful_queries = 0u32;
522
523 let session_key = cp_tor::SessionKey::derive_with_salt(
525 &signing_key.to_bytes(),
526 &blake3::hash(b"validator-peer-test").as_bytes()[..32].try_into().unwrap(),
527 );
528
529 for query in selected {
530 let start = Instant::now();
531
532 let response_result = tokio::time::timeout(
534 std::time::Duration::from_secs(5),
535 send_query_to_peer(
536 tor_runtime,
537 &peer.onion_address,
538 &query.query_embedding,
539 Some(query.query_text.clone()),
540 model_hash,
541 &session_key,
542 &peer.public_key,
543 ),
544 )
545 .await;
546
547 let elapsed_ms = start.elapsed().as_millis() as u64;
548
549 match response_result {
550 Ok(Ok(response)) if response.status == SearchStatus::Ok => {
551 latency_sum += elapsed_ms;
552
553 let returned_uuids: Vec<Uuid> = response
554 .results
555 .iter()
556 .map(|r| Uuid::from_bytes(r.chunk_id))
557 .collect();
558
559 let relevant_uuids: Vec<Uuid> = query.relevant_chunk_ids.clone();
560
561 precision_sum += precision_at_k(&returned_uuids, &relevant_uuids, 10);
562 ndcg_sum += ndcg_at_k(
563 &returned_uuids,
564 &relevant_uuids,
565 &query.relevance_grade_map(),
566 10,
567 );
568 mrr_sum += mrr(&returned_uuids, &relevant_uuids);
569
570 for result in &response.results {
572 if let Some(proof_hashes) = &result.merkle_proof {
573 total_proofs += 1;
574 let leaf = *blake3::hash(&result.chunk_id).as_bytes();
581 let computed_root = proof_hashes.iter().fold(leaf, |current, sibling| {
582 let mut hasher = blake3::Hasher::new();
583 if current <= *sibling {
584 hasher.update(¤t);
585 hasher.update(sibling);
586 } else {
587 hasher.update(sibling);
588 hasher.update(¤t);
589 }
590 *hasher.finalize().as_bytes()
591 });
592 if computed_root == response.peer_state_root {
593 valid_proofs += 1;
594 }
595 }
596 }
597
598 successful_queries += 1;
599 }
600 Ok(Ok(response)) => {
601 debug!(status = ?response.status, "Peer returned non-OK status");
602 }
603 Ok(Err(e)) => {
604 debug!(error = %e, "Search request failed");
605 }
606 Err(_) => {
607 debug!("Search request timed out");
608 }
609 }
610 }
611
612 let metrics = if successful_queries > 0 {
613 PeerQualityMetrics {
614 precision_at_10: precision_sum / successful_queries as f32,
615 ndcg_at_10: ndcg_sum / successful_queries as f32,
616 mrr: mrr_sum / successful_queries as f32,
617 response_latency_ms: (latency_sum / successful_queries as u64).min(u16::MAX as u64) as u16,
618 proof_validity_rate: if total_proofs > 0 {
619 valid_proofs as f32 / total_proofs as f32
620 } else {
621 0.0
622 },
623 availability_rate: 1.0,
624 }
625 } else {
626 PeerQualityMetrics {
627 precision_at_10: 0.0,
628 ndcg_at_10: 0.0,
629 mrr: 0.0,
630 response_latency_ms: 0,
631 proof_validity_rate: 0.0,
632 availability_rate: 0.0,
633 }
634 };
635
636 let window = ValidationWindow::current();
637 let now = std::time::SystemTime::now()
638 .duration_since(std::time::UNIX_EPOCH)
639 .unwrap()
640 .as_millis() as i64;
641
642 let mut result = PeerTestResult {
643 peer_node_id: peer.node_id,
644 peer_onion: peer.onion_address.clone(),
645 validator_node_id,
646 window_id: window.window_id,
647 metrics,
648 timestamp: now,
649 signature: [0u8; 64],
650 };
651
652 let hash = result.signing_bytes();
653 let sig = signing_key.sign(&hash);
654 result.signature = sig.to_bytes();
655
656 Ok(result)
657}
658
659async fn send_query_to_peer(
661 tor_runtime: &cp_tor::TorRuntime,
662 onion_address: &str,
663 query_embedding: &[i16],
664 query_text: Option<String>,
665 model_hash: [u8; 32],
666 session_key: &cp_tor::SessionKey,
667 peer_public_key: &[u8; 32],
668) -> Result<SearchResponse, crate::ValidatorError> {
669 let mut stream = tor_runtime
670 .connect_to_peer(onion_address)
671 .await
672 .map_err(|e| crate::ValidatorError::Tor(e.to_string()))?;
673
674 let response = cp_tor::search(
675 &mut stream,
676 query_embedding.to_vec(),
677 query_text,
678 10,
679 true,
680 model_hash,
681 session_key,
682 peer_public_key,
683 )
684 .await
685 .map_err(|e| crate::ValidatorError::Tor(e.to_string()))?;
686
687 Ok(response)
688}
689
690pub fn update_contributor_ratings(
696 results: &[EvaluationResult],
697 ratings: &mut HashMap<[u8; 32], (Rating, u64, i64)>,
698) {
699 if results.len() < 2 {
700 return;
701 }
702
703 let mut ranked: Vec<&EvaluationResult> = results.iter().collect();
705 ranked.sort_by(|a, b| {
706 composite_score(&b.metrics)
707 .partial_cmp(&composite_score(&a.metrics))
708 .unwrap_or(std::cmp::Ordering::Equal)
709 });
710
711 let now = std::time::SystemTime::now()
712 .duration_since(std::time::UNIX_EPOCH)
713 .unwrap()
714 .as_millis() as i64;
715
716 for i in 0..ranked.len() - 1 {
718 let winner_key = ranked[i].contributor_key;
719 let loser_key = ranked[i + 1].contributor_key;
720
721 let winner_entry = ratings
722 .entry(winner_key)
723 .or_insert((Rating::default(), 0, now));
724 let winner_rating = winner_entry.0;
725
726 let loser_entry = ratings
727 .entry(loser_key)
728 .or_insert((Rating::default(), 0, now));
729 let loser_rating = loser_entry.0;
730
731 let (new_winner, new_loser) =
732 crate::rating::pairwise_update(&winner_rating, &loser_rating);
733
734 ratings.get_mut(&winner_key).unwrap().0 = new_winner;
735 ratings.get_mut(&winner_key).unwrap().1 += 1;
736 ratings.get_mut(&winner_key).unwrap().2 = now;
737
738 ratings.get_mut(&loser_key).unwrap().0 = new_loser;
739 ratings.get_mut(&loser_key).unwrap().1 += 1;
740 ratings.get_mut(&loser_key).unwrap().2 = now;
741 }
742}
743
744pub fn update_peer_ratings(
750 results: &[PeerTestResult],
751 ratings: &mut HashMap<[u8; 16], (Rating, u64, i64)>,
752) {
753 if results.len() < 2 {
754 return;
755 }
756
757 let mut ranked: Vec<&PeerTestResult> = results.iter().collect();
758 ranked.sort_by(|a, b| {
759 peer_composite_score(&b.metrics)
760 .partial_cmp(&peer_composite_score(&a.metrics))
761 .unwrap_or(std::cmp::Ordering::Equal)
762 });
763
764 let now = std::time::SystemTime::now()
765 .duration_since(std::time::UNIX_EPOCH)
766 .unwrap()
767 .as_millis() as i64;
768
769 for i in 0..ranked.len() - 1 {
770 let winner_key = ranked[i].peer_node_id;
771 let loser_key = ranked[i + 1].peer_node_id;
772
773 let winner_entry = ratings
774 .entry(winner_key)
775 .or_insert((Rating::default(), 0, now));
776 let winner_rating = winner_entry.0;
777
778 let loser_entry = ratings
779 .entry(loser_key)
780 .or_insert((Rating::default(), 0, now));
781 let loser_rating = loser_entry.0;
782
783 let (new_winner, new_loser) =
784 crate::rating::pairwise_update(&winner_rating, &loser_rating);
785
786 ratings.get_mut(&winner_key).unwrap().0 = new_winner;
787 ratings.get_mut(&winner_key).unwrap().1 += 1;
788 ratings.get_mut(&winner_key).unwrap().2 = now;
789
790 ratings.get_mut(&loser_key).unwrap().0 = new_loser;
791 ratings.get_mut(&loser_key).unwrap().1 += 1;
792 ratings.get_mut(&loser_key).unwrap().2 = now;
793 }
794}
795
796#[cfg(test)]
797mod tests {
798 use super::*;
799
800 fn make_uuids(n: usize) -> Vec<Uuid> {
801 (0..n)
802 .map(|i| {
803 let mut bytes = [0u8; 16];
804 bytes[0] = i as u8;
805 Uuid::from_bytes(bytes)
806 })
807 .collect()
808 }
809
810 #[test]
813 fn test_precision_at_k_all_relevant() {
814 let ids = make_uuids(10);
815 let relevant = ids.clone();
816 assert!((precision_at_k(&ids, &relevant, 10) - 1.0).abs() < 1e-6);
817 }
818
819 #[test]
820 fn test_precision_at_k_none_relevant() {
821 let returned = make_uuids(10);
822 let relevant = vec![Uuid::from_bytes([99u8; 16])];
823 assert!((precision_at_k(&returned, &relevant, 10)).abs() < 1e-6);
824 }
825
826 #[test]
827 fn test_precision_at_k_half_relevant() {
828 let returned = make_uuids(10);
829 let relevant: Vec<Uuid> = returned[..5].to_vec();
830 assert!((precision_at_k(&returned, &relevant, 10) - 0.5).abs() < 1e-6);
831 }
832
833 #[test]
834 fn test_precision_at_k_fewer_results_than_k() {
835 let returned = make_uuids(3);
836 let relevant = returned.clone();
837 assert!((precision_at_k(&returned, &relevant, 10) - 0.3).abs() < 1e-6);
839 }
840
841 #[test]
842 fn test_precision_at_k_zero() {
843 let returned = make_uuids(5);
844 let relevant = returned.clone();
845 assert!((precision_at_k(&returned, &relevant, 0)).abs() < 1e-6);
846 }
847
848 #[test]
849 fn test_precision_at_k_empty_returned() {
850 let relevant = make_uuids(5);
851 assert!((precision_at_k(&[], &relevant, 10)).abs() < 1e-6);
852 }
853
854 #[test]
857 fn test_ndcg_at_k_perfect_ranking() {
858 let ids = make_uuids(5);
859 let relevant = ids.clone();
860 let mut grades = HashMap::new();
861 for (i, id) in ids.iter().enumerate() {
862 grades.insert(*id, (5 - i) as u8);
863 }
864 let score = ndcg_at_k(&ids, &relevant, &grades, 5);
865 assert!((score - 1.0).abs() < 1e-5, "Perfect NDCG should be 1.0, got {}", score);
866 }
867
868 #[test]
869 fn test_ndcg_at_k_no_relevant() {
870 let returned = make_uuids(5);
871 let relevant: Vec<Uuid> = vec![];
872 let grades = HashMap::new();
873 assert!((ndcg_at_k(&returned, &relevant, &grades, 5)).abs() < 1e-6);
874 }
875
876 #[test]
877 fn test_ndcg_at_k_binary_relevance() {
878 let returned = make_uuids(5);
879 let relevant = vec![returned[4]];
880 let grades = HashMap::new();
881
882 let score = ndcg_at_k(&returned, &relevant, &grades, 5);
883 assert!(score > 0.3 && score < 0.5, "Got NDCG = {}", score);
884 }
885
886 #[test]
887 fn test_ndcg_at_k_zero_k() {
888 let ids = make_uuids(5);
889 let grades = HashMap::new();
890 assert!((ndcg_at_k(&ids, &ids, &grades, 0)).abs() < 1e-6);
891 }
892
893 #[test]
896 fn test_mrr_first_position() {
897 let returned = make_uuids(5);
898 let relevant = vec![returned[0]];
899 assert!((mrr(&returned, &relevant) - 1.0).abs() < 1e-6);
900 }
901
902 #[test]
903 fn test_mrr_second_position() {
904 let returned = make_uuids(5);
905 let relevant = vec![returned[1]];
906 assert!((mrr(&returned, &relevant) - 0.5).abs() < 1e-6);
907 }
908
909 #[test]
910 fn test_mrr_third_position() {
911 let returned = make_uuids(5);
912 let relevant = vec![returned[2]];
913 let score = mrr(&returned, &relevant);
914 assert!((score - 1.0 / 3.0).abs() < 1e-6, "Expected 1/3, got {}", score);
915 }
916
917 #[test]
918 fn test_mrr_no_relevant() {
919 let returned = make_uuids(5);
920 let relevant = vec![Uuid::from_bytes([99u8; 16])];
921 assert!((mrr(&returned, &relevant)).abs() < 1e-6);
922 }
923
924 #[test]
925 fn test_mrr_empty() {
926 let relevant = make_uuids(3);
927 assert!((mrr(&[], &relevant)).abs() < 1e-6);
928 }
929
930 #[test]
931 fn test_mrr_multiple_relevant_returns_first() {
932 let returned = make_uuids(5);
933 let relevant = vec![returned[1], returned[3]];
934 assert!((mrr(&returned, &relevant) - 0.5).abs() < 1e-6);
935 }
936
937 #[test]
940 fn test_composite_score_perfect() {
941 let metrics = QualityMetrics {
942 precision_at_10: 1.0,
943 ndcg_at_10: 1.0,
944 mrr: 1.0,
945 merkle_proofs_valid: 10,
946 merkle_proofs_invalid: 0,
947 adapter_perplexity_delta: None,
948 };
949 let score = composite_score(&metrics);
950 assert!((score - 1.0).abs() < 1e-6, "Perfect score should be 1.0, got {}", score);
951 }
952
953 #[test]
954 fn test_composite_score_zero() {
955 let metrics = QualityMetrics {
956 precision_at_10: 0.0,
957 ndcg_at_10: 0.0,
958 mrr: 0.0,
959 merkle_proofs_valid: 0,
960 merkle_proofs_invalid: 0,
961 adapter_perplexity_delta: None,
962 };
963 assert!((composite_score(&metrics)).abs() < 1e-6);
964 }
965
966 #[test]
967 fn test_composite_score_adapter_penalty() {
968 let metrics = QualityMetrics {
969 precision_at_10: 1.0,
970 ndcg_at_10: 1.0,
971 mrr: 1.0,
972 merkle_proofs_valid: 10,
973 merkle_proofs_invalid: 0,
974 adapter_perplexity_delta: Some(0.5),
975 };
976 let score = composite_score(&metrics);
977 assert!((score - 0.8).abs() < 1e-6, "Penalized score should be 0.8, got {}", score);
978 }
979
980 #[test]
981 fn test_composite_score_adapter_improvement() {
982 let metrics = QualityMetrics {
983 precision_at_10: 1.0,
984 ndcg_at_10: 1.0,
985 mrr: 1.0,
986 merkle_proofs_valid: 10,
987 merkle_proofs_invalid: 0,
988 adapter_perplexity_delta: Some(-0.5),
989 };
990 let score = composite_score(&metrics);
991 assert!((score - 1.0).abs() < 1e-6);
992 }
993
994 #[test]
997 fn test_peer_composite_score_fast_good_peer() {
998 let metrics = PeerQualityMetrics {
999 precision_at_10: 1.0,
1000 ndcg_at_10: 1.0,
1001 mrr: 1.0,
1002 response_latency_ms: 0,
1003 proof_validity_rate: 1.0,
1004 availability_rate: 1.0,
1005 };
1006 let score = peer_composite_score(&metrics);
1007 assert!((score - 0.79).abs() < 0.01, "Got {}", score);
1012 }
1013
1014 #[test]
1015 fn test_peer_composite_score_slow_peer() {
1016 let metrics = PeerQualityMetrics {
1017 precision_at_10: 1.0,
1018 ndcg_at_10: 1.0,
1019 mrr: 1.0,
1020 response_latency_ms: 5000,
1021 proof_validity_rate: 1.0,
1022 availability_rate: 1.0,
1023 };
1024 let score = peer_composite_score(&metrics);
1025 assert!((score - 0.69).abs() < 0.01, "Got {}", score);
1026 }
1027
1028 #[test]
1029 fn test_peer_composite_score_unreachable() {
1030 let metrics = PeerQualityMetrics {
1031 precision_at_10: 0.0,
1032 ndcg_at_10: 0.0,
1033 mrr: 0.0,
1034 response_latency_ms: 0,
1035 proof_validity_rate: 0.0,
1036 availability_rate: 0.0,
1037 };
1038 let score = peer_composite_score(&metrics);
1039 assert!((score - 0.1).abs() < 0.01, "Got {}", score);
1040 }
1041
1042 #[test]
1045 fn test_update_contributor_ratings_single_result() {
1046 let result = EvaluationResult {
1047 contributor_key: [1u8; 32],
1048 validator_node_id: [0u8; 16],
1049 window_id: 100,
1050 metrics: QualityMetrics {
1051 precision_at_10: 0.8,
1052 ndcg_at_10: 0.7,
1053 mrr: 0.9,
1054 merkle_proofs_valid: 5,
1055 merkle_proofs_invalid: 0,
1056 adapter_perplexity_delta: None,
1057 },
1058 timestamp: 0,
1059 signature: [0u8; 64],
1060 };
1061
1062 let mut ratings = HashMap::new();
1063 update_contributor_ratings(&[result], &mut ratings);
1064 assert!(ratings.is_empty());
1065 }
1066
1067 #[test]
1068 fn test_update_contributor_ratings_two_results() {
1069 let good = EvaluationResult {
1070 contributor_key: [1u8; 32],
1071 validator_node_id: [0u8; 16],
1072 window_id: 100,
1073 metrics: QualityMetrics {
1074 precision_at_10: 0.9,
1075 ndcg_at_10: 0.9,
1076 mrr: 0.9,
1077 merkle_proofs_valid: 10,
1078 merkle_proofs_invalid: 0,
1079 adapter_perplexity_delta: None,
1080 },
1081 timestamp: 0,
1082 signature: [0u8; 64],
1083 };
1084
1085 let bad = EvaluationResult {
1086 contributor_key: [2u8; 32],
1087 validator_node_id: [0u8; 16],
1088 window_id: 100,
1089 metrics: QualityMetrics {
1090 precision_at_10: 0.1,
1091 ndcg_at_10: 0.1,
1092 mrr: 0.1,
1093 merkle_proofs_valid: 1,
1094 merkle_proofs_invalid: 9,
1095 adapter_perplexity_delta: None,
1096 },
1097 timestamp: 0,
1098 signature: [0u8; 64],
1099 };
1100
1101 let mut ratings = HashMap::new();
1102 update_contributor_ratings(&[good, bad], &mut ratings);
1103
1104 let good_rating = ratings.get(&[1u8; 32]).unwrap();
1105 let bad_rating = ratings.get(&[2u8; 32]).unwrap();
1106
1107 assert!(
1108 good_rating.0.mu > bad_rating.0.mu,
1109 "Good contributor mu ({}) should exceed bad contributor mu ({})",
1110 good_rating.0.mu,
1111 bad_rating.0.mu
1112 );
1113 }
1114}