1#![deny(unsafe_code)]
9pub mod aggregator;
26mod audit;
27mod budget;
28pub mod cache;
29#[cfg(feature = "cli")]
30pub mod ci;
31mod efficiency;
32mod environment_state;
33mod error;
34mod evaluator;
35#[cfg(feature = "judge-core")]
36pub mod evaluators;
37mod gate;
38#[cfg(feature = "generation")]
39pub mod generation;
40pub mod judge;
41mod match_;
42#[cfg(feature = "judge-core")]
43pub mod prompt;
44pub mod report;
45mod response;
46mod runner;
47mod score;
48mod semantic_tool_parameter;
49mod semantic_tool_selection;
50#[cfg(feature = "simulation")]
51pub mod simulation;
52mod store;
53#[cfg(feature = "telemetry")]
54pub mod telemetry;
55pub mod testing;
56#[cfg(feature = "trace-ingest")]
57pub mod trace;
58#[cfg(feature = "training-export")]
59pub mod training;
60mod trajectory;
61mod types;
62mod url_filter;
63#[cfg(feature = "yaml")]
64mod yaml;
65
66pub use aggregator::{Aggregator, AllPass, AnyPass, Average, Weighted};
69pub use audit::AuditedInvocation;
70pub use budget::BudgetEvaluator;
71pub use cache::{
72 CacheKey as TaskResultCacheKey, EvaluationDataStore, FingerprintContext,
73 LocalFileTaskResultStore, StoreError, canonicalize_fingerprint, tool_set_hash,
74};
75pub use efficiency::EfficiencyEvaluator;
76pub use environment_state::EnvironmentStateEvaluator;
77pub use error::EvalError;
78pub use evaluator::{Evaluator, EvaluatorRegistry};
79#[cfg(feature = "evaluator-agent")]
80pub use evaluators::agent::{
81 AgentToneEvaluator, InteractionsEvaluator, KnowledgeRetentionEvaluator,
82 LanguageDetectionEvaluator, PerceivedErrorEvaluator, TaskCompletionEvaluator,
83 TrajectoryAccuracyEvaluator, TrajectoryAccuracyWithRefEvaluator, UserSatisfactionEvaluator,
84};
85#[cfg(feature = "evaluator-code")]
86pub use evaluators::code::llm_judge::CodeLlmJudgeEvaluator;
87#[cfg(feature = "evaluator-code")]
88pub use evaluators::code::{
89 CargoCheckEvaluator, ClippyEvaluator, CodeExtractor, CodeExtractorStrategy,
90};
91#[cfg(feature = "evaluator-sandbox")]
92pub use evaluators::code::{
93 SandboxLimits, SandboxOutcome, SandboxRunner, SandboxedExecutionEvaluator, ShellRunner,
94 run_sandboxed,
95};
96#[cfg(feature = "multimodal")]
97pub use evaluators::multimodal::ImageSafetyEvaluator;
98#[cfg(feature = "evaluator-quality")]
99pub use evaluators::quality::{
100 CoherenceEvaluator, ConcisenessEvaluator, CorrectnessEvaluator, FaithfulnessEvaluator,
101 GoalSuccessRateEvaluator, HallucinationEvaluator, HelpfulnessEvaluator, LazinessEvaluator,
102 PlanAdherenceEvaluator, ResponseRelevanceEvaluator, assertion_implies_goal_completion,
103};
104#[cfg(feature = "evaluator-rag")]
105pub use evaluators::rag::{
106 DEFAULT_EMBEDDING_SIMILARITY_THRESHOLD, Embedder, EmbedderError, EmbeddingSimilarityEvaluator,
107 RAGGroundednessEvaluator, RAGHelpfulnessEvaluator, RAGRetrievalRelevanceEvaluator,
108};
109#[cfg(feature = "evaluator-safety")]
110pub use evaluators::safety::{
111 CodeInjectionEvaluator, FairnessEvaluator, HarmfulnessEvaluator, PIIClass, PIILeakageEvaluator,
112 PromptInjectionEvaluator, ToxicityEvaluator,
113};
114#[cfg(feature = "evaluator-simple")]
115pub use evaluators::simple::{ExactMatchEvaluator, LevenshteinDistanceEvaluator};
116#[cfg(feature = "evaluator-structured")]
117pub use evaluators::structured::{JsonMatchEvaluator, JsonSchemaEvaluator, KeyStrategy};
118#[cfg(feature = "judge-core")]
119pub use evaluators::{
120 Detail, DetailBuffer, DispatchError, DispatchOutcome, EvaluatorError, JudgeEvaluatorBuilder,
121 JudgeEvaluatorConfig, dispatch_judge, drive_judge_call, evaluate_with_builtin,
122 finish_metric_result, materialize_case_attachments,
123};
124pub use gate::{GateConfig, GateResult, check_gate};
125pub use judge::{
126 CacheKey, DEFAULT_JUDGE_CACHE_CAPACITY, JudgeCache, JudgeClient, JudgeError, JudgeFuture,
127 JudgeRegistry, JudgeRegistryBuilder, JudgeRegistryError, JudgeVerdict, RetryPolicy,
128};
129pub use match_::{MatchMode, TrajectoryMatcher};
130#[cfg(feature = "judge-core")]
131pub use prompt::{
132 BUILTIN_TEMPLATE_VERSIONS, JudgePromptTemplate, MinijinjaTemplate, PromptContext, PromptError,
133 PromptFamily, PromptTemplateRegistry,
134};
135#[cfg(feature = "html-report")]
136pub use report::HtmlReporter;
137pub use report::{
138 ConsoleReporter, JsonReporter, MarkdownReporter, Reporter, ReporterError, ReporterOutput,
139};
140#[cfg(feature = "langsmith")]
141pub use report::{LangSmithExportError, LangSmithExporter};
142pub use response::ResponseMatcher;
143pub use runner::{AgentFactory, EvalRunner, RunnerMetricSample};
144pub use score::{Score, Verdict};
145pub use semantic_tool_parameter::SemanticToolParameterEvaluator;
146pub use semantic_tool_selection::SemanticToolSelectionEvaluator;
147pub use store::{EvalStore, FsEvalStore};
148#[cfg(feature = "telemetry")]
149pub use telemetry::{EvalsTelemetry, EvalsTelemetryBuilder};
150pub use testing::{MockJudge, PanickingMockJudge, SlowMockJudge};
151#[cfg(feature = "trace-langfuse")]
152pub use trace::LangfuseTraceProvider;
153#[cfg(feature = "trace-otlp")]
154pub use trace::OtlpHttpTraceProvider;
155#[cfg(feature = "training-export")]
156pub use training::{
157 ChatMlExporter, DpoExporter, ExportError, ExportOptions, ScoredTrace, ShareGptExporter,
158 TrainingExporter, TrainingFormat, TrainingReporter, export_traces,
159};
160pub use trajectory::TrajectoryCollector;
161pub use types::{
162 Assertion, AssertionKind, Attachment, AttachmentError, BudgetConstraints, CASE_NAMESPACE,
163 CaseFingerprint, EnvironmentState, EvalCase, EvalCaseResult, EvalMetricResult, EvalSet,
164 EvalSetResult, EvalSummary, ExpectedToolCall, FewShotExample, InteractionExpectation,
165 Invocation, MaterializedAttachment, RecordedToolCall, ResponseCriteria, StateCapture,
166 ToolIntent, TurnRecord, validate_eval_case, validate_eval_set,
167};
168pub use url_filter::{DefaultUrlFilter, UrlFilter};
169#[cfg(feature = "yaml")]
170pub use yaml::load_eval_set_yaml;