pub mod adversarial;
pub mod case;
pub mod fault_report;
pub mod ranking_metrics;
pub mod recorder;
pub mod regression;
pub mod stability_tests;
pub mod suite;
pub mod trial;
pub use trial::{ConfidenceInterval95, EvaluationStats, TrialResult};
pub use case::{AlwaysFailCase, AlwaysPassCase, EvaluationCase, StochasticCase};
pub use suite::{EvaluationSuite, SuiteConfig, SuiteResult};
pub use recorder::{SequenceDiff, ToolCallRecord, ToolSequenceRecorder};
pub use adversarial::{AdversarialTestCase, AdversarialTestType};
pub use regression::{
CategoryBaseline, CategoryRegressionResult, RegressionConfig, RegressionResult, RegressionSuite,
};
pub use stability_tests::{
GoalPreservationCase, LoopDetectionSimCase, long_horizon_stability_suite,
};
pub use fault_report::{FaultKind, FaultReport, analyze_suite_for_faults};
pub use ranking_metrics::{mrr, ndcg_at_k, precision_at_k};