oxirs_embed/application_tasks/
mod.rs1pub mod classification;
8pub mod clustering;
9pub mod query_answering;
10pub mod recommendation;
11pub mod retrieval;
12pub mod search;
13
14use crate::EmbeddingModel;
15use anyhow::{anyhow, Result};
16use serde::{Deserialize, Serialize};
17use std::collections::VecDeque;
18use std::sync::{Arc, RwLock};
19use std::time::Instant;
20
21pub use classification::{
23 ClassResults, ClassificationEvaluator, ClassificationMetric, ClassificationReport,
24 ClassificationResults, SimpleClassifier,
25};
26pub use clustering::{
27 ClusterAnalysis, ClusteringEvaluator, ClusteringMetric, ClusteringResults,
28 ClusteringStabilityAnalysis,
29};
30pub use query_answering::{
31 ApplicationQueryAnsweringEvaluator, ComplexityResults, QueryAnsweringMetric,
32 QueryAnsweringResults, QueryComplexity, QueryResult, QueryType, QuestionAnswerPair,
33 ReasoningAnalysis, TypeResults,
34};
35pub use recommendation::{
36 ABTestResults, CoverageStats, DiversityAnalysis, InteractionType, ItemMetadata,
37 RecommendationEvaluator, RecommendationMetric, RecommendationResults, UserInteraction,
38 UserRecommendationResults,
39};
40pub use retrieval::{
41 DocumentMetadata, RetrievalAnalysis, RetrievalEvaluator, RetrievalMetric, RetrievalQuery,
42 RetrievalResults,
43};
44pub use search::{
45 QueryPerformanceAnalysis, QueryResults, RelevanceJudgment, SearchEffectivenessMetrics,
46 SearchEvaluator, SearchMetric, SearchResults,
47};
48
49#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct ApplicationEvalConfig {
52 pub enable_recommendation_eval: bool,
54 pub enable_search_eval: bool,
56 pub enable_clustering_eval: bool,
58 pub enable_classification_eval: bool,
60 pub enable_retrieval_eval: bool,
62 pub enable_query_answering_eval: bool,
64 pub sample_size: usize,
66 pub num_recommendations: usize,
68 pub num_clusters: usize,
70 pub cv_folds: usize,
72 pub enable_user_satisfaction: bool,
74 pub num_query_tests: usize,
76}
77
78impl Default for ApplicationEvalConfig {
79 fn default() -> Self {
80 Self {
81 enable_recommendation_eval: true,
82 enable_search_eval: true,
83 enable_clustering_eval: true,
84 enable_classification_eval: true,
85 enable_retrieval_eval: true,
86 enable_query_answering_eval: true,
87 sample_size: 1000,
88 num_recommendations: 10,
89 num_clusters: 5,
90 cv_folds: 5,
91 enable_user_satisfaction: true,
92 num_query_tests: 100,
93 }
94 }
95}
96
97#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct ApplicationEvalResults {
100 pub timestamp: chrono::DateTime<chrono::Utc>,
102 pub model_id: String,
104 pub recommendation_results: Option<RecommendationResults>,
106 pub search_results: Option<SearchResults>,
108 pub clustering_results: Option<ClusteringResults>,
110 pub classification_results: Option<ClassificationResults>,
112 pub retrieval_results: Option<RetrievalResults>,
114 pub query_answering_results: Option<QueryAnsweringResults>,
116 pub overall_score: f64,
118 pub evaluation_time: f64,
120}
121
122pub struct ApplicationTaskEvaluator {
124 config: ApplicationEvalConfig,
126 recommendation_evaluator: RecommendationEvaluator,
128 search_evaluator: SearchEvaluator,
129 clustering_evaluator: ClusteringEvaluator,
130 classification_evaluator: ClassificationEvaluator,
131 retrieval_evaluator: RetrievalEvaluator,
132 query_answering_evaluator: ApplicationQueryAnsweringEvaluator,
133 evaluation_history: Arc<RwLock<VecDeque<ApplicationEvalResults>>>,
135}
136
137impl ApplicationTaskEvaluator {
138 pub fn new(config: ApplicationEvalConfig) -> Self {
140 Self {
141 config,
142 recommendation_evaluator: RecommendationEvaluator::new(),
143 search_evaluator: SearchEvaluator::new(),
144 clustering_evaluator: ClusteringEvaluator::new(),
145 classification_evaluator: ClassificationEvaluator::new(),
146 retrieval_evaluator: RetrievalEvaluator::new(),
147 query_answering_evaluator: ApplicationQueryAnsweringEvaluator::new(),
148 evaluation_history: Arc::new(RwLock::new(VecDeque::new())),
149 }
150 }
151
152 pub async fn evaluate_all_tasks(
154 &self,
155 model: &dyn EmbeddingModel,
156 ) -> Result<ApplicationEvalResults> {
157 let start_time = Instant::now();
158 let model_id = model.model_id().to_string();
159
160 let mut recommendation_results = None;
161 let mut search_results = None;
162 let mut clustering_results = None;
163 let mut classification_results = None;
164 let mut retrieval_results = None;
165 let mut query_answering_results = None;
166
167 if self.config.enable_recommendation_eval {
169 recommendation_results = Some(
170 self.recommendation_evaluator
171 .evaluate(model, &self.config)
172 .await?,
173 );
174 }
175
176 if self.config.enable_search_eval {
177 search_results = Some(self.search_evaluator.evaluate(model, &self.config).await?);
178 }
179
180 if self.config.enable_clustering_eval {
181 clustering_results = Some(
182 self.clustering_evaluator
183 .evaluate(model, &self.config)
184 .await?,
185 );
186 }
187
188 if self.config.enable_classification_eval {
189 classification_results = Some(
190 self.classification_evaluator
191 .evaluate(model, &self.config)
192 .await?,
193 );
194 }
195
196 if self.config.enable_retrieval_eval {
197 retrieval_results = Some(
198 self.retrieval_evaluator
199 .evaluate(model, &self.config)
200 .await?,
201 );
202 }
203
204 if self.config.enable_query_answering_eval {
205 query_answering_results = Some(
206 self.query_answering_evaluator
207 .evaluate(model, &self.config)
208 .await?,
209 );
210 }
211
212 let evaluation_time = start_time.elapsed().as_secs_f64();
213
214 let overall_score = self.calculate_overall_score(
216 &recommendation_results,
217 &search_results,
218 &clustering_results,
219 &classification_results,
220 &retrieval_results,
221 &query_answering_results,
222 );
223
224 let results = ApplicationEvalResults {
225 timestamp: chrono::Utc::now(),
226 model_id,
227 recommendation_results,
228 search_results,
229 clustering_results,
230 classification_results,
231 retrieval_results,
232 query_answering_results,
233 overall_score,
234 evaluation_time,
235 };
236
237 if let Ok(mut history) = self.evaluation_history.write() {
239 history.push_back(results.clone());
240 if history.len() > 100 {
241 history.pop_front();
242 }
243 }
244
245 Ok(results)
246 }
247
248 fn calculate_overall_score(
250 &self,
251 recommendation: &Option<RecommendationResults>,
252 search: &Option<SearchResults>,
253 clustering: &Option<ClusteringResults>,
254 classification: &Option<ClassificationResults>,
255 retrieval: &Option<RetrievalResults>,
256 query_answering: &Option<QueryAnsweringResults>,
257 ) -> f64 {
258 let mut total_score = 0.0;
259 let mut component_count = 0;
260
261 if let Some(rec) = recommendation {
262 let precision = rec.metric_scores.get("PrecisionAtK(5)").unwrap_or(&0.0);
264 let coverage = rec.metric_scores.get("Coverage").unwrap_or(&0.0);
265 total_score += (precision + coverage) / 2.0;
266 component_count += 1;
267 }
268
269 if let Some(search) = search {
270 let ndcg = search.metric_scores.get("NDCG(10)").unwrap_or(&0.0);
271 let map = search.metric_scores.get("MAP").unwrap_or(&0.0);
272 total_score += (ndcg + map) / 2.0;
273 component_count += 1;
274 }
275
276 if let Some(clustering) = clustering {
277 let silhouette = clustering
278 .metric_scores
279 .get("SilhouetteScore")
280 .unwrap_or(&0.0);
281 total_score += silhouette.abs(); component_count += 1;
283 }
284
285 if let Some(classification) = classification {
286 let accuracy = classification.metric_scores.get("Accuracy").unwrap_or(&0.0);
287 let f1 = classification.metric_scores.get("F1Score").unwrap_or(&0.0);
288 total_score += (accuracy + f1) / 2.0;
289 component_count += 1;
290 }
291
292 if let Some(retrieval) = retrieval {
293 let precision = retrieval
294 .metric_scores
295 .get("PrecisionAtK(10)")
296 .unwrap_or(&0.0);
297 let recall = retrieval.metric_scores.get("RecallAtK(10)").unwrap_or(&0.0);
298 total_score += (precision + recall) / 2.0;
299 component_count += 1;
300 }
301
302 if let Some(qa) = query_answering {
303 total_score += qa.overall_accuracy;
304 component_count += 1;
305 }
306
307 if component_count > 0 {
308 total_score / component_count as f64
309 } else {
310 0.0
311 }
312 }
313
314 pub fn get_evaluation_history(&self) -> Result<Vec<ApplicationEvalResults>> {
316 match self.evaluation_history.read() {
317 Ok(history) => Ok(history.iter().cloned().collect()),
318 _ => Err(anyhow!("Failed to read evaluation history")),
319 }
320 }
321
322 pub fn clear_history(&self) -> Result<()> {
324 match self.evaluation_history.write() {
325 Ok(mut history) => {
326 history.clear();
327 Ok(())
328 }
329 _ => Err(anyhow!("Failed to clear evaluation history")),
330 }
331 }
332
333 pub fn config(&self) -> &ApplicationEvalConfig {
335 &self.config
336 }
337
338 pub fn set_config(&mut self, config: ApplicationEvalConfig) {
340 self.config = config;
341 }
342}