rig_retrieval_evals/
lib.rs

1//! # rig-retrieval-evals
2//!
3//! Retrieval and knowledge-base evaluation harness for
4//! [Rig](https://crates.io/crates/rig-core) agents.
5//!
6//! The crate gives you:
7//!
8//! - A BEIR-compatible [`dataset::Qrels`] loader (JSONL and BEIR/BRIGHT
9//!   `queries.jsonl` + `qrels/<split>.tsv` via [`dataset::Qrels::from_beir`]).
10//! - A pure-Rust catalogue of standard IR metrics (Recall, Precision, MRR,
11//!   MAP, nDCG, HitRate) in [`retrieval`].
12//! - An async [`harness::RetrievalHarness`] that drives any store
13//!   implementing [`rig::vector_store::VectorStoreIndexDyn`], plus a
14//!   [`retriever::Retriever`] seam for scoring non-vector backends (lexical /
15//!   BM25 / hybrid) with the same metrics.
16//! - A deterministic, seeded [`synthetic`] corpus + qrels generator for
17//!   reproducible benchmarks and fixture-free tests.
18//! - JSON / Markdown [`report::MultiReport`]s with baseline diffing.
19//!
20//! See the crate README for an end-to-end quickstart.
21//!
22//! ## Stability
23//!
24//! The default build ships retrieval-quality evaluation only. Optional features
25//! add RAGAS-style judges, zero-waste ingestion checks, knowledge-gain scoring,
26//! and optional embedding novelty adapters.
27
28#![deny(missing_docs)]
29#![deny(rust_2018_idioms)]
30#![forbid(unsafe_code)]
31
32#[cfg(feature = "agents")]
33pub mod agents;
34pub mod dataset;
35pub mod error;
36pub mod harness;
37#[cfg(feature = "knowledge-gain")]
38pub mod knowledge_gain;
39#[cfg(feature = "memory")]
40pub mod memory;
41#[cfg(feature = "models")]
42pub mod models;
43#[cfg(feature = "observe")]
44pub mod observe;
45#[cfg(feature = "ragas")]
46pub mod ragas;
47pub mod report;
48pub mod retrieval;
49pub mod retriever;
50#[cfg(feature = "shadow")]
51pub mod shadow;
52pub mod staleness;
53pub mod synthetic;
54
55#[cfg(feature = "agents")]
56pub use agents::{
57    AgentEvalReport, AgentEvalResult, AgentEvalRunner, AgentEvalTask, AgentEvalTaskSet,
58    AgentHarness, AgentObservation, AgentToolCall,
59};
60pub use dataset::{GoldQuery, Qrels, RetrievedDoc, RetrievedSet};
61pub use error::{Error, Result};
62pub use harness::RetrievalHarness;
63#[cfg(feature = "knowledge-gain")]
64pub use knowledge_gain::{
65    CandidateDocumentGain, CandidateDocumentGainInput, CandidateQueryGain, KnowledgeGainConfig,
66    KnowledgeGainReport, MetricGain, QueryGain,
67};
68#[cfg(feature = "embedding-novelty")]
69pub use knowledge_gain::{CandidateNoveltyInput, EmbeddingNoveltyAdapter};
70#[cfg(feature = "memory")]
71pub use memory::{
72    MemoryEvalReport, MemoryEvalResult, MemoryHarness, MemoryObservation, MemoryRunner, MemoryTask,
73    MemoryTaskSet,
74};
75#[cfg(feature = "models")]
76pub use models::{
77    ModelBehaviorHarness, ModelBehaviorReport, ModelBehaviorResult, ModelBehaviorTask,
78    ModelBehaviorTaskSet, ModelObservation, ModelRunner,
79};
80#[cfg(feature = "observe")]
81pub use observe::{
82    EvalEnvelope, EvalKind, SCHEMA_VERSION as OBSERVE_SCHEMA_VERSION, diff_envelopes, emit_diff,
83    emit_report, report_envelopes,
84};
85pub use report::{
86    FreshnessQueryRollup, FreshnessReport, MetricCi, MetricDelta, MetricReport, MultiReport,
87    QueryDelta, QueryReliability, RegressionGate, ReliabilityReport, ReportDiff,
88};
89pub use retrieval::{HitRateAtK, MapAtK, Mrr, NdcgAtK, PrecisionAtK, RecallAtK, RetrievalMetric};
90pub use retriever::{Retriever, VectorStoreRetriever, retrieve_all, score_retriever};
91#[cfg(feature = "shadow")]
92pub use shadow::{EvalShadowStore, ShadowEvalReport};
93pub use staleness::{
94    ConflictGroup, ConflictReport, CorpusVersions, StaleHit, StalenessAnnotation, StalenessReport,
95    detect_conflicts, detect_stale_hits,
96};
97pub use synthetic::{
98    SyntheticConfig, SyntheticCorpus, SyntheticDoc, generate as generate_synthetic,
99};
100
101#[cfg(feature = "ingestion")]
102pub mod ingestion;
103
104#[cfg(feature = "skills")]
105pub mod skills;
106
107#[cfg(feature = "ingestion-graph")]
108pub use ingestion::PetgraphBaseline;
109
110#[cfg(feature = "ingestion")]
111pub use ingestion::{
112    ActiveGraphTrack, ActivePropositionTrack, Chunk, ChunkLintConfig, ChunkLintReport,
113    ChunkLintWarning, ChunkStats, DistillationPipeline, Document, Dropped, DroppedItem,
114    DroppedReason, EncodingLintWarning, GraphBaseline, GraphTrack, InMemoryGraphBaseline,
115    InMemoryIocBaseline, IngestionDelta, IngestionReport, Ioc, IocBaseline, IocExtractor, IocKind,
116    LanguageCount, LanguageLintConfig, LlmPropositionExtractor, LlmTripleExtractor,
117    NearDuplicateLintConfig, NearDuplicatePair, NoGraphTrack, NoPropositionTrack, Proposition,
118    PropositionExtractor, PropositionTrack, RedundancyCheck, RedundancyVerdict, RegexIocExtractor,
119    Section, SectionKind, StubPropositionExtractor, StubTripleExtractor, Triple, TripleExtractor,
120    VectorStoreRedundancyCheck, corpus_jaccard_knowledge_gain, jaccard_knowledge_gain, lint_chunks,
121    lint_chunks_strict,
122};
rig_retrieval_evals/lib.rs

rig_retrieval_evals/
lib.rs