1pub mod adversarial;
16pub mod case;
17pub mod fault_report;
18pub mod fixtures;
19pub mod ranking_metrics;
20pub mod recorder;
21pub mod regression;
22pub mod stability_tests;
23pub mod suite;
24pub mod trial;
25
26pub use trial::{ConfidenceInterval95, EvaluationStats, TrialResult};
30
31pub use case::{AlwaysFailCase, AlwaysPassCase, EvaluationCase, StochasticCase};
33
34pub use suite::{EvaluationSuite, SuiteConfig, SuiteResult};
36
37pub use recorder::{SequenceDiff, ToolCallRecord, ToolSequenceRecorder};
39
40pub use adversarial::{AdversarialTestCase, AdversarialTestType};
42
43pub use regression::{
45 CategoryBaseline, CategoryRegressionResult, RegressionConfig, RegressionResult, RegressionSuite,
46};
47
48pub use stability_tests::{
50 GoalPreservationCase, LoopDetectionSimCase, long_horizon_stability_suite,
51};
52
53pub use fault_report::{FaultKind, FaultReport, analyze_suite_for_faults};
55
56pub use fixtures::{
58 Assertion, ExpectedBehavior, Fixture, FixtureCase, FixtureMessage, FixtureRunner, RunOutcome,
59 load_fixture_file, load_fixtures_from_dir,
60};
61
62pub use ranking_metrics::{mrr, ndcg_at_k, precision_at_k};