use super::{SufficiencyChecker, SufficiencyLevel};
use crate::config::SufficiencyConfig;
#[derive(Debug, Clone)]
pub struct ThresholdConfig {
pub min_tokens: usize,
pub target_tokens: usize,
pub max_tokens: usize,
pub min_content_length: usize,
}
impl Default for ThresholdConfig {
fn default() -> Self {
Self::from_config(&SufficiencyConfig::default())
}
}
impl ThresholdConfig {
pub fn from_config(config: &SufficiencyConfig) -> Self {
Self {
min_tokens: config.min_tokens,
target_tokens: config.target_tokens,
max_tokens: config.max_tokens,
min_content_length: config.min_content_length,
}
}
}
pub struct ThresholdChecker {
config: ThresholdConfig,
}
impl ThresholdChecker {
pub fn new() -> Self {
Self {
config: ThresholdConfig::default(),
}
}
pub fn with_config(config: ThresholdConfig) -> Self {
Self { config }
}
fn estimate_tokens(&self, content: &str) -> usize {
content.len() / 4
}
fn check_quality(&self, content: &str) -> f32 {
let mut score = 0.0;
let sentence_endings = content.matches('.').count()
+ content.matches('?').count()
+ content.matches('!').count();
score += (sentence_endings as f32 * 0.05).min(0.3);
let paragraphs = content.matches("\n\n").count();
score += (paragraphs as f32 * 0.1).min(0.3);
if content.contains(':') || content.contains('-') {
score += 0.1;
}
let words: Vec<&str> = content.split_whitespace().collect();
if words.len() > 10 {
let unique_ratio = words.iter().collect::<std::collections::HashSet<_>>().len() as f32
/ words.len() as f32;
score += unique_ratio * 0.3;
}
score.min(1.0)
}
}
impl Default for ThresholdChecker {
fn default() -> Self {
Self::new()
}
}
impl SufficiencyChecker for ThresholdChecker {
fn check(&self, query: &str, content: &str, token_count: usize) -> SufficiencyLevel {
let estimated_tokens = if token_count == 0 {
self.estimate_tokens(content)
} else {
token_count
};
if content.len() < self.config.min_content_length {
return SufficiencyLevel::Insufficient;
}
if estimated_tokens >= self.config.max_tokens {
return SufficiencyLevel::Sufficient;
}
if estimated_tokens >= self.config.target_tokens {
let quality = self.check_quality(content);
if quality > 0.5 {
return SufficiencyLevel::Sufficient;
} else {
return SufficiencyLevel::PartialSufficient;
}
}
if estimated_tokens >= self.config.min_tokens {
let quality = self.check_quality(content);
if quality > 0.7 {
return SufficiencyLevel::PartialSufficient;
}
}
SufficiencyLevel::Insufficient
}
fn name(&self) -> &'static str {
"threshold"
}
}