1use std::collections::HashMap;
12
13#[derive(Debug, Clone, PartialEq)]
15pub struct ValidationCheck {
16 pub name: String,
17 pub passed: bool,
18 pub score: f64,
19 pub reason: String,
20}
21
22#[derive(Debug, Clone)]
24pub struct ValidationResult {
25 pub checks: Vec<ValidationCheck>,
26 pub overall_score: f64,
27 pub accepted: bool,
28}
29
30impl ValidationResult {
31 pub fn pass_count(&self) -> usize { self.checks.iter().filter(|c| c.passed).count() }
32 pub fn fail_count(&self) -> usize { self.checks.iter().filter(|c| !c.passed).count() }
33
34 pub fn get_check(&self, name: &str) -> Option<&ValidationCheck> {
35 self.checks.iter().find(|c| c.name == name)
36 }
37}
38
39#[derive(Debug, Clone)]
41pub struct TileInput {
42 pub id: String,
43 pub content: String,
44 pub confidence: f64,
45 pub domain: String,
46 pub created_at: u64,
47 pub refreshed_at: u64,
48 pub usage_count: u64,
49 pub success_rate: f64,
50 pub tags: Vec<String>,
51}
52
53#[derive(Debug, Clone)]
55pub struct ValidationConfig {
56 pub min_confidence: f64,
57 pub min_content_length: usize,
58 pub max_content_length: usize,
59 pub freshness_window_secs: u64,
60 pub min_success_rate: f64,
61 pub min_usage_count: u64,
62 pub similarity_threshold: f64,
63 pub acceptance_threshold: f64,
64}
65
66impl Default for ValidationConfig {
67 fn default() -> Self {
68 Self {
69 min_confidence: 0.3,
70 min_content_length: 10,
71 max_content_length: 100_000,
72 freshness_window_secs: 7 * 24 * 3600, min_success_rate: 0.0, min_usage_count: 0,
75 similarity_threshold: 0.9,
76 acceptance_threshold: 0.6,
77 }
78 }
79}
80
81pub struct TileValidator {
83 config: ValidationConfig,
84 existing_contents: Vec<(String, String)>, }
86
87impl TileValidator {
88 pub fn new(config: ValidationConfig) -> Self {
89 Self { config, existing_contents: Vec::new() }
90 }
91
92 pub fn with_defaults() -> Self {
93 Self::new(ValidationConfig::default())
94 }
95
96 pub fn register_existing(&mut self, id: &str, content: &str) {
98 self.existing_contents.push((id.to_string(), content.to_string()));
99 }
100
101 pub fn validate(&self, tile: &TileInput) -> ValidationResult {
103 let mut checks = Vec::new();
104
105 checks.push(self.check_confidence(tile));
106 checks.push(self.check_content_length(tile));
107 checks.push(self.check_freshness(tile));
108 checks.push(self.check_usage_quality(tile));
109 checks.push(self.check_domain_format(tile));
110 checks.push(self.check_similar_existing(tile));
111
112 let overall_score: f64 = if checks.is_empty() {
113 0.0
114 } else {
115 checks.iter().map(|c| c.score).sum::<f64>() / checks.len() as f64
116 };
117 let accepted = overall_score >= self.config.acceptance_threshold;
118
119 ValidationResult { checks, overall_score, accepted }
120 }
121
122 fn check_confidence(&self, tile: &TileInput) -> ValidationCheck {
124 let passed = tile.confidence >= self.config.min_confidence;
125 let score = if passed { 1.0 } else { tile.confidence / self.config.min_confidence };
126 ValidationCheck {
127 name: "confidence".to_string(),
128 passed,
129 score,
130 reason: if passed {
131 format!("confidence {:.2} >= {:.2}", tile.confidence, self.config.min_confidence)
132 } else {
133 format!("confidence {:.2} < {:.2}", tile.confidence, self.config.min_confidence)
134 },
135 }
136 }
137
138 fn check_content_length(&self, tile: &TileInput) -> ValidationCheck {
140 let len = tile.content.len();
141 let too_short = len < self.config.min_content_length;
142 let too_long = len > self.config.max_content_length;
143 let passed = !too_short && !too_long;
144 let score = if passed { 1.0 } else if too_short {
145 len as f64 / self.config.min_content_length as f64
146 } else {
147 0.0
148 };
149 ValidationCheck {
150 name: "content_length".to_string(),
151 passed,
152 score: score.min(1.0),
153 reason: format!("content length {} (min: {}, max: {})", len, self.config.min_content_length, self.config.max_content_length),
154 }
155 }
156
157 fn check_freshness(&self, tile: &TileInput) -> ValidationCheck {
159 let age = tile.refreshed_at;
161 let within_window = age <= self.config.freshness_window_secs;
162 let score = if within_window { 1.0 } else {
163 (self.config.freshness_window_secs as f64 / age.max(1) as f64).min(1.0)
164 };
165 ValidationCheck {
166 name: "freshness".to_string(),
167 passed: within_window,
168 score,
169 reason: format!("age {}s (window: {}s)", age, self.config.freshness_window_secs),
170 }
171 }
172
173 fn check_usage_quality(&self, tile: &TileInput) -> ValidationCheck {
175 if tile.usage_count == 0 {
177 return ValidationCheck {
178 name: "usage_quality".to_string(),
179 passed: true,
180 score: 1.0,
181 reason: "new tile, no usage data yet".to_string(),
182 };
183 }
184 let usage_ok = tile.usage_count >= self.config.min_usage_count;
185 let rate_ok = tile.success_rate >= self.config.min_success_rate;
186 let passed = usage_ok && rate_ok;
187 let score = if passed { 1.0 } else {
188 (tile.success_rate * 0.5 + if usage_ok { 0.5 } else { 0.0 }).min(1.0)
189 };
190 ValidationCheck {
191 name: "usage_quality".to_string(),
192 passed,
193 score,
194 reason: format!("usage {} (min: {}), rate {:.2}", tile.usage_count, self.config.min_usage_count, tile.success_rate),
195 }
196 }
197
198 fn check_domain_format(&self, tile: &TileInput) -> ValidationCheck {
200 let domain = tile.domain.trim();
201 let passed = !domain.is_empty() && domain.len() <= 100;
202 ValidationCheck {
203 name: "domain_format".to_string(),
204 passed,
205 score: if passed { 1.0 } else { 0.0 },
206 reason: format!("domain: '{}'", domain),
207 }
208 }
209
210 fn check_similar_existing(&self, tile: &TileInput) -> ValidationCheck {
212 let mut max_sim = 0.0_f64;
213 let mut most_similar = String::new();
214 for (id, content) in &self.existing_contents {
215 let sim = jaccard_similarity(&tile.content.to_lowercase(), &content.to_lowercase());
216 if sim > max_sim {
217 max_sim = sim;
218 most_similar = id.clone();
219 }
220 }
221 let passed = max_sim < self.config.similarity_threshold;
222 ValidationCheck {
223 name: "similarity".to_string(),
224 passed,
225 score: if self.existing_contents.is_empty() { 1.0 } else { 1.0 - max_sim },
226 reason: if most_similar.is_empty() {
227 "no existing tiles to compare".to_string()
228 } else {
229 format!("max similarity {:.2} to '{}' (threshold: {:.2})", max_sim, most_similar, self.config.similarity_threshold)
230 },
231 }
232 }
233}
234
235fn jaccard_similarity(a: &str, b: &str) -> f64 {
237 let words_a: std::collections::HashSet<&str> = a.split_whitespace().collect();
238 let words_b: std::collections::HashSet<&str> = b.split_whitespace().collect();
239 if words_a.is_empty() && words_b.is_empty() { return 1.0; }
240 if words_a.is_empty() || words_b.is_empty() { return 0.0; }
241 let intersection = words_a.intersection(&words_b).count();
242 let union = words_a.union(&words_b).count();
243 intersection as f64 / union as f64
244}
245
246#[cfg(test)]
247mod tests {
248 use super::*;
249
250 fn make_tile(content: &str, confidence: f64) -> TileInput {
251 TileInput {
252 id: "t1".to_string(),
253 content: content.to_string(),
254 confidence,
255 domain: "testing".to_string(),
256 created_at: 1000,
257 refreshed_at: 5000,
258 usage_count: 10,
259 success_rate: 0.9,
260 tags: vec!["test".to_string()],
261 }
262 }
263
264 #[test]
265 fn test_accepts_good_tile() {
266 let v = TileValidator::with_defaults();
267 let tile = make_tile("This is a valid knowledge tile with sufficient content length.", 0.8);
268 let result = v.validate(&tile);
269 assert!(result.accepted);
270 assert!(result.overall_score >= 0.6);
271 }
272
273 #[test]
274 fn test_rejects_low_confidence() {
275 let v = TileValidator::with_defaults();
276 let tile = make_tile("Valid content here.", 0.1);
277 let result = v.validate(&tile);
278 let conf = result.get_check("confidence").unwrap();
279 assert!(!conf.passed);
280 }
281
282 #[test]
283 fn test_rejects_short_content() {
284 let v = TileValidator::with_defaults();
285 let tile = make_tile("too", 0.8);
286 let result = v.validate(&tile);
287 let len_check = result.get_check("content_length").unwrap();
288 assert!(!len_check.passed);
289 }
290
291 #[test]
292 fn test_detects_similar_existing() {
293 let mut v = TileValidator::with_defaults();
294 v.register_existing("existing", "Rust is a systems programming language focused on safety speed and performance with zero cost abstractions");
295 let tile = make_tile("Rust is a systems programming language focused on safety speed and performance with zero cost abstractions and concurrency", 0.8);
296 let result = v.validate(&tile);
297 let sim = result.get_check("similarity").unwrap();
298 assert!(!sim.passed, "similarity should be above threshold");
299 }
300
301 #[test]
302 fn test_new_tile_passes_usage_quality() {
303 let v = TileValidator::with_defaults();
304 let mut tile = make_tile("Valid content for a brand new tile.", 0.8);
305 tile.usage_count = 0;
306 tile.success_rate = 0.0;
307 let result = v.validate(&tile);
308 let uq = result.get_check("usage_quality").unwrap();
309 assert!(uq.passed);
310 }
311
312 #[test]
313 fn test_all_checks_run() {
314 let v = TileValidator::with_defaults();
315 let tile = make_tile("Content for testing all checks.", 0.5);
316 let result = v.validate(&tile);
317 assert_eq!(result.checks.len(), 6);
318 assert!(result.get_check("confidence").is_some());
319 assert!(result.get_check("content_length").is_some());
320 assert!(result.get_check("freshness").is_some());
321 assert!(result.get_check("usage_quality").is_some());
322 assert!(result.get_check("domain_format").is_some());
323 assert!(result.get_check("similarity").is_some());
324 }
325
326 #[test]
327 fn test_custom_config() {
328 let config = ValidationConfig {
329 min_confidence: 0.9,
330 min_content_length: 50,
331 acceptance_threshold: 0.95,
332 ..ValidationConfig::default()
333 };
334 let v = TileValidator::new(config);
335 let tile = make_tile("Short.", 0.8);
336 let result = v.validate(&tile);
337 assert!(!result.accepted);
338 }
339
340 #[test]
341 fn test_empty_domain_fails() {
342 let v = TileValidator::with_defaults();
343 let mut tile = make_tile("Valid content with enough length.", 0.8);
344 tile.domain = "".to_string();
345 let result = v.validate(&tile);
346 let dom = result.get_check("domain_format").unwrap();
347 assert!(!dom.passed);
348 }
349
350 #[test]
351 fn test_pass_fail_counts() {
352 let v = TileValidator::with_defaults();
353 let tile = make_tile("Valid content.", 0.1); let result = v.validate(&tile);
355 assert_eq!(result.pass_count() + result.fail_count(), result.checks.len());
356 assert!(result.fail_count() >= 1);
357 }
358
359 #[test]
360 fn test_freshness_decay() {
361 let config = ValidationConfig {
362 freshness_window_secs: 100,
363 ..ValidationConfig::default()
364 };
365 let v = TileValidator::new(config);
366 let mut tile = make_tile("Valid content for freshness test.", 0.8);
367 tile.refreshed_at = 500; let result = v.validate(&tile);
369 let fresh = result.get_check("freshness").unwrap();
370 assert!(!fresh.passed);
371 assert!(fresh.score < 1.0);
372 }
373
374 #[test]
375 fn test_stale_tile_low_usage_rate() {
376 let v = TileValidator::with_defaults();
377 let mut tile = make_tile("Valid content here.", 0.8);
378 tile.usage_count = 100;
379 tile.success_rate = 0.2;
380 let result = v.validate(&tile);
381 let uq = result.get_check("usage_quality").unwrap();
382 assert!(uq.passed);
384 }
385}