use crate::episode::Episode;
use crate::search::{SearchField, SearchMode, SearchResult};
use chrono::{DateTime, Utc};
pub mod z_scores {
pub const CONFIDENCE_90: f64 = 1.645;
pub const CONFIDENCE_95: f64 = 1.96;
pub const CONFIDENCE_99: f64 = 2.576;
}
#[must_use]
pub fn wilson_lower_bound(successes: u64, trials: u64, z: f64) -> f64 {
if trials == 0 {
return 0.0;
}
let n = trials as f64;
let p = successes as f64 / n;
let z_squared = z * z;
let numerator =
p + z_squared / (2.0 * n) - z * (p * (1.0 - p) / n + z_squared / (4.0 * n * n)).sqrt();
let denominator = 1.0 + z_squared / n;
(numerator / denominator).clamp(0.0, 1.0)
}
#[must_use]
pub fn wilson_upper_bound(successes: u64, trials: u64, z: f64) -> f64 {
if trials == 0 {
return 1.0;
}
let n = trials as f64;
let p = successes as f64 / n;
let z_squared = z * z;
let numerator =
p + z_squared / (2.0 * n) + z * (p * (1.0 - p) / n + z_squared / (4.0 * n * n)).sqrt();
let denominator = 1.0 + z_squared / n;
(numerator / denominator).clamp(0.0, 1.0)
}
#[derive(Debug, Clone, Copy)]
pub struct RankingItem {
pub successes: u64,
pub trials: u64,
}
impl RankingItem {
#[must_use]
pub fn new(successes: u64, trials: u64) -> Self {
Self { successes, trials }
}
#[must_use]
pub fn wilson_score(&self, z: f64) -> f64 {
wilson_lower_bound(self.successes, self.trials, z)
}
}
#[must_use]
pub fn rank_by_wilson_score(items: &[RankingItem], z: f64) -> Vec<usize> {
let mut scored: Vec<(usize, f64)> = items
.iter()
.enumerate()
.map(|(i, item)| (i, item.wilson_score(z)))
.collect();
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
scored.into_iter().map(|(i, _)| i).collect()
}
#[derive(Debug, Clone)]
pub struct RankingWeights {
pub relevance: f64,
pub recency: f64,
pub success: f64,
pub completeness: f64,
pub field_importance: f64,
}
impl Default for RankingWeights {
fn default() -> Self {
Self {
relevance: 0.40, recency: 0.20, success: 0.20, completeness: 0.10, field_importance: 0.10, }
}
}
impl RankingWeights {
#[must_use]
pub fn new(
relevance: f64,
recency: f64,
success: f64,
completeness: f64,
field_importance: f64,
) -> Self {
Self {
relevance,
recency,
success,
completeness,
field_importance,
}
}
pub fn validate(&self) -> Result<(), String> {
let sum = self.relevance
+ self.recency
+ self.success
+ self.completeness
+ self.field_importance;
if (sum - 1.0).abs() > 0.01 {
return Err(format!(
"Weights should sum to 1.0, got {} (difference: {})",
sum,
(sum - 1.0).abs()
));
}
Ok(())
}
}
#[must_use]
pub fn calculate_relevance_score(mode: &SearchMode, similarity: f64) -> f64 {
match mode {
SearchMode::Exact => {
if similarity >= 1.0 { 1.0 } else { 0.0 }
}
SearchMode::Fuzzy { threshold } => {
if similarity >= *threshold {
(similarity - threshold) / (1.0 - threshold)
} else {
0.0
}
}
SearchMode::Regex => {
0.9
}
}
}
#[must_use]
pub fn calculate_recency_score(start_time: DateTime<Utc>, now: DateTime<Utc>) -> f64 {
let age_days = (now - start_time).num_days() as f64;
let half_life = 30.0;
0.5_f64.powf(age_days / half_life)
}
#[must_use]
pub fn calculate_success_score(episode: &Episode) -> f64 {
if let Some(ref outcome) = episode.outcome {
match outcome {
crate::types::TaskOutcome::Success { .. } => 1.0,
crate::types::TaskOutcome::PartialSuccess { .. } => 0.6,
crate::types::TaskOutcome::Failure { .. } => 0.2,
}
} else {
0.5
}
}
#[must_use]
pub fn calculate_completeness_score(episode: &Episode) -> f64 {
if episode.is_complete() {
1.0
} else {
(episode.steps.len() as f64 * 0.1).min(0.8)
}
}
#[must_use]
pub fn calculate_field_importance_score(field: &SearchField) -> f64 {
field.weight()
}
#[must_use]
pub fn calculate_ranking_score(
episode: &Episode,
mode: &SearchMode,
similarity: f64,
matched_field: &SearchField,
weights: &RankingWeights,
) -> f64 {
let now = Utc::now();
let relevance = calculate_relevance_score(mode, similarity);
let recency = calculate_recency_score(episode.start_time, now);
let success = calculate_success_score(episode);
let completeness = calculate_completeness_score(episode);
let field_importance = calculate_field_importance_score(matched_field);
relevance * weights.relevance
+ recency * weights.recency
+ success * weights.success
+ completeness * weights.completeness
+ field_importance * weights.field_importance
}
#[must_use]
pub fn rank_search_results<T>(mut results: Vec<SearchResult<T>>) -> Vec<SearchResult<T>> {
results.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
});
results
}
#[cfg(test)]
mod tests;