use std::collections::HashMap;
#[derive(Debug, Clone, PartialEq)]
pub struct ValidationCheck {
pub name: String,
pub passed: bool,
pub score: f64,
pub reason: String,
}
#[derive(Debug, Clone)]
pub struct ValidationResult {
pub checks: Vec<ValidationCheck>,
pub overall_score: f64,
pub accepted: bool,
}
impl ValidationResult {
pub fn pass_count(&self) -> usize { self.checks.iter().filter(|c| c.passed).count() }
pub fn fail_count(&self) -> usize { self.checks.iter().filter(|c| !c.passed).count() }
pub fn get_check(&self, name: &str) -> Option<&ValidationCheck> {
self.checks.iter().find(|c| c.name == name)
}
}
#[derive(Debug, Clone)]
pub struct TileInput {
pub id: String,
pub content: String,
pub confidence: f64,
pub domain: String,
pub created_at: u64,
pub refreshed_at: u64,
pub usage_count: u64,
pub success_rate: f64,
pub tags: Vec<String>,
}
#[derive(Debug, Clone)]
pub struct ValidationConfig {
pub min_confidence: f64,
pub min_content_length: usize,
pub max_content_length: usize,
pub freshness_window_secs: u64,
pub min_success_rate: f64,
pub min_usage_count: u64,
pub similarity_threshold: f64,
pub acceptance_threshold: f64,
}
impl Default for ValidationConfig {
fn default() -> Self {
Self {
min_confidence: 0.3,
min_content_length: 10,
max_content_length: 100_000,
freshness_window_secs: 7 * 24 * 3600, min_success_rate: 0.0, min_usage_count: 0,
similarity_threshold: 0.9,
acceptance_threshold: 0.6,
}
}
}
pub struct TileValidator {
config: ValidationConfig,
existing_contents: Vec<(String, String)>, }
impl TileValidator {
pub fn new(config: ValidationConfig) -> Self {
Self { config, existing_contents: Vec::new() }
}
pub fn with_defaults() -> Self {
Self::new(ValidationConfig::default())
}
pub fn register_existing(&mut self, id: &str, content: &str) {
self.existing_contents.push((id.to_string(), content.to_string()));
}
pub fn validate(&self, tile: &TileInput) -> ValidationResult {
let mut checks = Vec::new();
checks.push(self.check_confidence(tile));
checks.push(self.check_content_length(tile));
checks.push(self.check_freshness(tile));
checks.push(self.check_usage_quality(tile));
checks.push(self.check_domain_format(tile));
checks.push(self.check_similar_existing(tile));
let overall_score: f64 = if checks.is_empty() {
0.0
} else {
checks.iter().map(|c| c.score).sum::<f64>() / checks.len() as f64
};
let accepted = overall_score >= self.config.acceptance_threshold;
ValidationResult { checks, overall_score, accepted }
}
fn check_confidence(&self, tile: &TileInput) -> ValidationCheck {
let passed = tile.confidence >= self.config.min_confidence;
let score = if passed { 1.0 } else { tile.confidence / self.config.min_confidence };
ValidationCheck {
name: "confidence".to_string(),
passed,
score,
reason: if passed {
format!("confidence {:.2} >= {:.2}", tile.confidence, self.config.min_confidence)
} else {
format!("confidence {:.2} < {:.2}", tile.confidence, self.config.min_confidence)
},
}
}
fn check_content_length(&self, tile: &TileInput) -> ValidationCheck {
let len = tile.content.len();
let too_short = len < self.config.min_content_length;
let too_long = len > self.config.max_content_length;
let passed = !too_short && !too_long;
let score = if passed { 1.0 } else if too_short {
len as f64 / self.config.min_content_length as f64
} else {
0.0
};
ValidationCheck {
name: "content_length".to_string(),
passed,
score: score.min(1.0),
reason: format!("content length {} (min: {}, max: {})", len, self.config.min_content_length, self.config.max_content_length),
}
}
fn check_freshness(&self, tile: &TileInput) -> ValidationCheck {
let age = tile.refreshed_at;
let within_window = age <= self.config.freshness_window_secs;
let score = if within_window { 1.0 } else {
(self.config.freshness_window_secs as f64 / age.max(1) as f64).min(1.0)
};
ValidationCheck {
name: "freshness".to_string(),
passed: within_window,
score,
reason: format!("age {}s (window: {}s)", age, self.config.freshness_window_secs),
}
}
fn check_usage_quality(&self, tile: &TileInput) -> ValidationCheck {
if tile.usage_count == 0 {
return ValidationCheck {
name: "usage_quality".to_string(),
passed: true,
score: 1.0,
reason: "new tile, no usage data yet".to_string(),
};
}
let usage_ok = tile.usage_count >= self.config.min_usage_count;
let rate_ok = tile.success_rate >= self.config.min_success_rate;
let passed = usage_ok && rate_ok;
let score = if passed { 1.0 } else {
(tile.success_rate * 0.5 + if usage_ok { 0.5 } else { 0.0 }).min(1.0)
};
ValidationCheck {
name: "usage_quality".to_string(),
passed,
score,
reason: format!("usage {} (min: {}), rate {:.2}", tile.usage_count, self.config.min_usage_count, tile.success_rate),
}
}
fn check_domain_format(&self, tile: &TileInput) -> ValidationCheck {
let domain = tile.domain.trim();
let passed = !domain.is_empty() && domain.len() <= 100;
ValidationCheck {
name: "domain_format".to_string(),
passed,
score: if passed { 1.0 } else { 0.0 },
reason: format!("domain: '{}'", domain),
}
}
fn check_similar_existing(&self, tile: &TileInput) -> ValidationCheck {
let mut max_sim = 0.0_f64;
let mut most_similar = String::new();
for (id, content) in &self.existing_contents {
let sim = jaccard_similarity(&tile.content.to_lowercase(), &content.to_lowercase());
if sim > max_sim {
max_sim = sim;
most_similar = id.clone();
}
}
let passed = max_sim < self.config.similarity_threshold;
ValidationCheck {
name: "similarity".to_string(),
passed,
score: if self.existing_contents.is_empty() { 1.0 } else { 1.0 - max_sim },
reason: if most_similar.is_empty() {
"no existing tiles to compare".to_string()
} else {
format!("max similarity {:.2} to '{}' (threshold: {:.2})", max_sim, most_similar, self.config.similarity_threshold)
},
}
}
}
fn jaccard_similarity(a: &str, b: &str) -> f64 {
let words_a: std::collections::HashSet<&str> = a.split_whitespace().collect();
let words_b: std::collections::HashSet<&str> = b.split_whitespace().collect();
if words_a.is_empty() && words_b.is_empty() { return 1.0; }
if words_a.is_empty() || words_b.is_empty() { return 0.0; }
let intersection = words_a.intersection(&words_b).count();
let union = words_a.union(&words_b).count();
intersection as f64 / union as f64
}
#[cfg(test)]
mod tests {
use super::*;
fn make_tile(content: &str, confidence: f64) -> TileInput {
TileInput {
id: "t1".to_string(),
content: content.to_string(),
confidence,
domain: "testing".to_string(),
created_at: 1000,
refreshed_at: 5000,
usage_count: 10,
success_rate: 0.9,
tags: vec!["test".to_string()],
}
}
#[test]
fn test_accepts_good_tile() {
let v = TileValidator::with_defaults();
let tile = make_tile("This is a valid knowledge tile with sufficient content length.", 0.8);
let result = v.validate(&tile);
assert!(result.accepted);
assert!(result.overall_score >= 0.6);
}
#[test]
fn test_rejects_low_confidence() {
let v = TileValidator::with_defaults();
let tile = make_tile("Valid content here.", 0.1);
let result = v.validate(&tile);
let conf = result.get_check("confidence").unwrap();
assert!(!conf.passed);
}
#[test]
fn test_rejects_short_content() {
let v = TileValidator::with_defaults();
let tile = make_tile("too", 0.8);
let result = v.validate(&tile);
let len_check = result.get_check("content_length").unwrap();
assert!(!len_check.passed);
}
#[test]
fn test_detects_similar_existing() {
let mut v = TileValidator::with_defaults();
v.register_existing("existing", "Rust is a systems programming language focused on safety speed and performance with zero cost abstractions");
let tile = make_tile("Rust is a systems programming language focused on safety speed and performance with zero cost abstractions and concurrency", 0.8);
let result = v.validate(&tile);
let sim = result.get_check("similarity").unwrap();
assert!(!sim.passed, "similarity should be above threshold");
}
#[test]
fn test_new_tile_passes_usage_quality() {
let v = TileValidator::with_defaults();
let mut tile = make_tile("Valid content for a brand new tile.", 0.8);
tile.usage_count = 0;
tile.success_rate = 0.0;
let result = v.validate(&tile);
let uq = result.get_check("usage_quality").unwrap();
assert!(uq.passed);
}
#[test]
fn test_all_checks_run() {
let v = TileValidator::with_defaults();
let tile = make_tile("Content for testing all checks.", 0.5);
let result = v.validate(&tile);
assert_eq!(result.checks.len(), 6);
assert!(result.get_check("confidence").is_some());
assert!(result.get_check("content_length").is_some());
assert!(result.get_check("freshness").is_some());
assert!(result.get_check("usage_quality").is_some());
assert!(result.get_check("domain_format").is_some());
assert!(result.get_check("similarity").is_some());
}
#[test]
fn test_custom_config() {
let config = ValidationConfig {
min_confidence: 0.9,
min_content_length: 50,
acceptance_threshold: 0.95,
..ValidationConfig::default()
};
let v = TileValidator::new(config);
let tile = make_tile("Short.", 0.8);
let result = v.validate(&tile);
assert!(!result.accepted);
}
#[test]
fn test_empty_domain_fails() {
let v = TileValidator::with_defaults();
let mut tile = make_tile("Valid content with enough length.", 0.8);
tile.domain = "".to_string();
let result = v.validate(&tile);
let dom = result.get_check("domain_format").unwrap();
assert!(!dom.passed);
}
#[test]
fn test_pass_fail_counts() {
let v = TileValidator::with_defaults();
let tile = make_tile("Valid content.", 0.1); let result = v.validate(&tile);
assert_eq!(result.pass_count() + result.fail_count(), result.checks.len());
assert!(result.fail_count() >= 1);
}
#[test]
fn test_freshness_decay() {
let config = ValidationConfig {
freshness_window_secs: 100,
..ValidationConfig::default()
};
let v = TileValidator::new(config);
let mut tile = make_tile("Valid content for freshness test.", 0.8);
tile.refreshed_at = 500; let result = v.validate(&tile);
let fresh = result.get_check("freshness").unwrap();
assert!(!fresh.passed);
assert!(fresh.score < 1.0);
}
#[test]
fn test_stale_tile_low_usage_rate() {
let v = TileValidator::with_defaults();
let mut tile = make_tile("Valid content here.", 0.8);
tile.usage_count = 100;
tile.success_rate = 0.2;
let result = v.validate(&tile);
let uq = result.get_check("usage_quality").unwrap();
assert!(uq.passed);
}
}