use crate::utils::current_timestamp;
use serde::{Deserialize, Serialize};
use smallvec::SmallVec;
pub fn jaro_winkler_similarity(s1: &str, s2: &str) -> f32 {
if s1 == s2 {
return 1.0;
}
if s1.is_empty() || s2.is_empty() {
return 0.0;
}
let jaro = jaro_similarity(s1, s2);
let prefix_len = s1
.chars()
.zip(s2.chars())
.take_while(|(a, b)| a == b)
.take(4)
.count();
jaro + (prefix_len as f32 * 0.1 * (1.0 - jaro))
}
fn jaro_similarity(s1: &str, s2: &str) -> f32 {
let s1c: SmallVec<[char; 64]> = s1.chars().collect();
let s2c: SmallVec<[char; 64]> = s2.chars().collect();
let len1 = s1c.len();
let len2 = s2c.len();
if len1 == 0 && len2 == 0 {
return 1.0;
}
if len1 == 0 || len2 == 0 {
return 0.0;
}
let window = (len1.max(len2) / 2).saturating_sub(1);
let mut s1_matched = SmallVec::<[bool; 64]>::from_elem(false, len1);
let mut s2_matched = SmallVec::<[bool; 64]>::from_elem(false, len2);
let mut matches = 0usize;
for (i, &c1) in s1c.iter().enumerate() {
let lo = i.saturating_sub(window);
let hi = (i + window + 1).min(len2);
for j in lo..hi {
if !s2_matched[j] && c1 == s2c[j] {
s1_matched[i] = true;
s2_matched[j] = true;
matches += 1;
break;
}
}
}
if matches == 0 {
return 0.0;
}
let transpositions = s1c
.iter()
.enumerate()
.filter(|&(i, _)| s1_matched[i])
.zip(s2c.iter().enumerate().filter(|&(i, _)| s2_matched[i]))
.filter(|&((_, &a), (_, &b))| a != b)
.count()
/ 2;
let m = matches as f32;
(m / len1 as f32 + m / len2 as f32 + (m - transpositions as f32) / m) / 3.0
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TimeDecayedScore {
pub base_score: f32,
pub age_seconds: u64,
pub decay_lambda: f32,
pub decayed_score: f32,
}
impl TimeDecayedScore {
pub fn calculate(base_score: f32, timestamp: u64) -> Self {
const DEFAULT_LAMBDA: f32 = 0.1;
let now = current_timestamp();
let age_seconds = now.saturating_sub(timestamp);
let age_hours = age_seconds as f32 / 3600.0;
let decayed_score = (base_score * (-DEFAULT_LAMBDA * age_hours).exp()).clamp(0.0, 1.0);
Self {
base_score,
age_seconds,
decay_lambda: DEFAULT_LAMBDA,
decayed_score,
}
}
pub fn with_decay(mut self, lambda: f32) -> Self {
let age_hours = self.age_seconds as f32 / 3600.0;
self.decayed_score = (self.base_score * (-lambda * age_hours).exp()).clamp(0.0, 1.0);
self.decay_lambda = lambda;
self
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub enum PatternState {
Single,
Duplicate,
Loop,
NearLoop,
RefinementChain,
Convergence,
Degradation,
}
pub fn detect_pattern(
history: &[(String, String, f32)], window_size: usize,
) -> PatternState {
if history.is_empty() {
return PatternState::Single;
}
let start = history.len().saturating_sub(window_size);
let recent = &history[start..];
if recent.len() < 2 {
return PatternState::Single;
}
let first = &recent[0];
if recent.iter().all(|r| r.0 == first.0 && r.1 == first.1) {
return if recent.len() >= 3 {
PatternState::Loop
} else {
PatternState::Duplicate
};
}
let mut qualities = SmallVec::<[f32; 32]>::with_capacity(recent.len());
let mut same_tool = true;
let mut multi_tool = false;
for r in recent {
qualities.push(r.2);
if r.0 != first.0 {
same_tool = false;
multi_tool = true;
}
}
if qualities.len() >= 3 {
if qualities.windows(2).all(|w| w[1] > w[0] + 0.05) {
return PatternState::RefinementChain;
}
if qualities.windows(2).all(|w| w[1] < w[0] - 0.05) {
return PatternState::Degradation;
}
}
if same_tool
&& recent.len() >= 3
&& recent
.windows(2)
.all(|w| jaro_winkler_similarity(&w[0].1, &w[1].1) > 0.85)
{
return PatternState::NearLoop;
}
if multi_tool {
let n = qualities.len() as f32;
let avg = qualities.iter().sum::<f32>() / n;
if qualities.iter().all(|&q| (q - avg).abs() < 0.1) {
return PatternState::Convergence;
}
}
PatternState::Single
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MLScoreComponents {
pub success_rate: f32,
pub avg_execution_time: f32,
pub result_quality: f32,
pub failure_count: usize,
pub age_hours: f32,
pub frequency: f32,
pub confidence: f32,
}
impl MLScoreComponents {
pub fn raw_score(&self) -> f32 {
(self.success_rate * 0.40)
+ (self.result_quality * 0.30)
+ ((10_000.0 - self.avg_execution_time).max(0.0) / 10_000.0 * 0.15)
+ (self.frequency.min(1.0) * 0.15)
}
pub fn with_age_decay(mut self) -> Self {
self.confidence = (self.confidence * (-self.age_hours / 168.0).exp()).max(0.1);
self
}
pub fn to_feature_vector(&self) -> [f32; 7] {
[
self.success_rate,
self.avg_execution_time / 10_000.0,
self.result_quality,
(self.failure_count as f32).min(10.0) / 10.0,
self.age_hours / 168.0,
self.frequency,
self.confidence,
]
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_jaro_winkler_exact() {
assert_eq!(jaro_winkler_similarity("hello", "hello"), 1.0);
}
#[test]
fn test_jaro_winkler_partial() {
let sim = jaro_winkler_similarity("pattern", "pattern_file");
assert!(sim > 0.85 && sim < 1.0, "sim={sim}");
}
#[test]
fn test_jaro_winkler_prefix_boost() {
let with_prefix = jaro_winkler_similarity("test_one", "test_two");
let without = jaro_winkler_similarity("one_test", "two_test");
assert!(with_prefix > without, "prefix boost should be applied");
}
#[test]
fn test_time_decay_ordering() {
let now = current_timestamp();
let recent = TimeDecayedScore::calculate(0.9, now);
let old = TimeDecayedScore::calculate(0.9, now.saturating_sub(7 * 24 * 3600));
assert!(old.decayed_score < recent.decayed_score);
}
#[test]
fn test_detect_pattern_loop() {
let history = vec![
("grep".to_string(), "pattern1".to_string(), 0.5),
("grep".to_string(), "pattern1".to_string(), 0.5),
("grep".to_string(), "pattern1".to_string(), 0.5),
];
assert_eq!(detect_pattern(&history, 10), PatternState::Loop);
}
#[test]
fn test_detect_pattern_refinement() {
let history = vec![
("grep".to_string(), "pat1".to_string(), 0.3),
("grep".to_string(), "pat2".to_string(), 0.5),
("grep".to_string(), "pat3".to_string(), 0.8),
];
assert_eq!(detect_pattern(&history, 10), PatternState::RefinementChain);
}
#[test]
fn test_detect_pattern_near_loop_requires_three_entries() {
let two_entries = vec![
("grep".to_string(), "pattern-one".to_string(), 0.4),
("grep".to_string(), "pattern-two".to_string(), 0.45),
];
assert_eq!(detect_pattern(&two_entries, 10), PatternState::Single);
let three_entries = vec![
("grep".to_string(), "pattern-one".to_string(), 0.4),
("grep".to_string(), "pattern-two".to_string(), 0.45),
("grep".to_string(), "pattern-three".to_string(), 0.5),
];
assert_eq!(detect_pattern(&three_entries, 10), PatternState::NearLoop);
}
#[test]
fn test_ml_raw_score() {
let c = MLScoreComponents {
success_rate: 0.9,
avg_execution_time: 100.0,
result_quality: 0.85,
failure_count: 1,
age_hours: 2.0,
frequency: 0.5,
confidence: 0.9,
};
let score = c.raw_score();
assert!(score > 0.7 && score < 1.0, "score={score}");
}
#[test]
fn test_ml_feature_vector_length() {
let c = MLScoreComponents {
success_rate: 0.9,
avg_execution_time: 100.0,
result_quality: 0.85,
failure_count: 1,
age_hours: 2.0,
frequency: 0.5,
confidence: 0.9,
};
assert_eq!(c.to_feature_vector().len(), 7);
}
}