Skip to main content

plato_tile_validate/
lib.rs

1//! # plato-tile-validate
2//! Tile quality validation pipeline.
3//!
4//! Before a tile enters the fleet knowledge base, it passes through
5//! validation gates: confidence check, freshness check, completeness
6//! check, and consistency check against existing tiles.
7//!
8//! From JC1's Living Knowledge: tiles that survive challenge get boosted.
9//! This crate is the gatekeeper.
10
11use std::collections::HashMap;
12
13/// Validation result for a single check.
14#[derive(Debug, Clone, PartialEq)]
15pub struct ValidationCheck {
16    pub name: String,
17    pub passed: bool,
18    pub score: f64,
19    pub reason: String,
20}
21
22/// Aggregate validation result.
23#[derive(Debug, Clone)]
24pub struct ValidationResult {
25    pub checks: Vec<ValidationCheck>,
26    pub overall_score: f64,
27    pub accepted: bool,
28}
29
30impl ValidationResult {
31    pub fn pass_count(&self) -> usize { self.checks.iter().filter(|c| c.passed).count() }
32    pub fn fail_count(&self) -> usize { self.checks.iter().filter(|c| !c.passed).count() }
33
34    pub fn get_check(&self, name: &str) -> Option<&ValidationCheck> {
35        self.checks.iter().find(|c| c.name == name)
36    }
37}
38
39/// A tile to validate.
40#[derive(Debug, Clone)]
41pub struct TileInput {
42    pub id: String,
43    pub content: String,
44    pub confidence: f64,
45    pub domain: String,
46    pub created_at: u64,
47    pub refreshed_at: u64,
48    pub usage_count: u64,
49    pub success_rate: f64,
50    pub tags: Vec<String>,
51}
52
53/// Validation pipeline configuration.
54#[derive(Debug, Clone)]
55pub struct ValidationConfig {
56    pub min_confidence: f64,
57    pub min_content_length: usize,
58    pub max_content_length: usize,
59    pub freshness_window_secs: u64,
60    pub min_success_rate: f64,
61    pub min_usage_count: u64,
62    pub similarity_threshold: f64,
63    pub acceptance_threshold: f64,
64}
65
66impl Default for ValidationConfig {
67    fn default() -> Self {
68        Self {
69            min_confidence: 0.3,
70            min_content_length: 10,
71            max_content_length: 100_000,
72            freshness_window_secs: 7 * 24 * 3600, // 7 days
73            min_success_rate: 0.0, // no usage needed for new tiles
74            min_usage_count: 0,
75            similarity_threshold: 0.9,
76            acceptance_threshold: 0.6,
77        }
78    }
79}
80
81/// Tile quality validator.
82pub struct TileValidator {
83    config: ValidationConfig,
84    existing_contents: Vec<(String, String)>, // (id, content)
85}
86
87impl TileValidator {
88    pub fn new(config: ValidationConfig) -> Self {
89        Self { config, existing_contents: Vec::new() }
90    }
91
92    pub fn with_defaults() -> Self {
93        Self::new(ValidationConfig::default())
94    }
95
96    /// Register an existing tile for similarity checking.
97    pub fn register_existing(&mut self, id: &str, content: &str) {
98        self.existing_contents.push((id.to_string(), content.to_string()));
99    }
100
101    /// Run all validation checks on a tile.
102    pub fn validate(&self, tile: &TileInput) -> ValidationResult {
103        let mut checks = Vec::new();
104
105        checks.push(self.check_confidence(tile));
106        checks.push(self.check_content_length(tile));
107        checks.push(self.check_freshness(tile));
108        checks.push(self.check_usage_quality(tile));
109        checks.push(self.check_domain_format(tile));
110        checks.push(self.check_similar_existing(tile));
111
112        let overall_score: f64 = if checks.is_empty() {
113            0.0
114        } else {
115            checks.iter().map(|c| c.score).sum::<f64>() / checks.len() as f64
116        };
117        let accepted = overall_score >= self.config.acceptance_threshold;
118
119        ValidationResult { checks, overall_score, accepted }
120    }
121
122    /// Check confidence meets minimum.
123    fn check_confidence(&self, tile: &TileInput) -> ValidationCheck {
124        let passed = tile.confidence >= self.config.min_confidence;
125        let score = if passed { 1.0 } else { tile.confidence / self.config.min_confidence };
126        ValidationCheck {
127            name: "confidence".to_string(),
128            passed,
129            score,
130            reason: if passed {
131                format!("confidence {:.2} >= {:.2}", tile.confidence, self.config.min_confidence)
132            } else {
133                format!("confidence {:.2} < {:.2}", tile.confidence, self.config.min_confidence)
134            },
135        }
136    }
137
138    /// Check content length is within bounds.
139    fn check_content_length(&self, tile: &TileInput) -> ValidationCheck {
140        let len = tile.content.len();
141        let too_short = len < self.config.min_content_length;
142        let too_long = len > self.config.max_content_length;
143        let passed = !too_short && !too_long;
144        let score = if passed { 1.0 } else if too_short {
145            len as f64 / self.config.min_content_length as f64
146        } else {
147            0.0
148        };
149        ValidationCheck {
150            name: "content_length".to_string(),
151            passed,
152            score: score.min(1.0),
153            reason: format!("content length {} (min: {}, max: {})", len, self.config.min_content_length, self.config.max_content_length),
154        }
155    }
156
157    /// Check tile is within freshness window.
158    fn check_freshness(&self, tile: &TileInput) -> ValidationCheck {
159        // Use current time of 0 for testing; compare refreshed_at
160        let age = tile.refreshed_at;
161        let within_window = age <= self.config.freshness_window_secs;
162        let score = if within_window { 1.0 } else {
163            (self.config.freshness_window_secs as f64 / age.max(1) as f64).min(1.0)
164        };
165        ValidationCheck {
166            name: "freshness".to_string(),
167            passed: within_window,
168            score,
169            reason: format!("age {}s (window: {}s)", age, self.config.freshness_window_secs),
170        }
171    }
172
173    /// Check usage quality (success rate and usage count).
174    fn check_usage_quality(&self, tile: &TileInput) -> ValidationCheck {
175        // New tiles (usage_count=0) always pass this check
176        if tile.usage_count == 0 {
177            return ValidationCheck {
178                name: "usage_quality".to_string(),
179                passed: true,
180                score: 1.0,
181                reason: "new tile, no usage data yet".to_string(),
182            };
183        }
184        let usage_ok = tile.usage_count >= self.config.min_usage_count;
185        let rate_ok = tile.success_rate >= self.config.min_success_rate;
186        let passed = usage_ok && rate_ok;
187        let score = if passed { 1.0 } else {
188            (tile.success_rate * 0.5 + if usage_ok { 0.5 } else { 0.0 }).min(1.0)
189        };
190        ValidationCheck {
191            name: "usage_quality".to_string(),
192            passed,
193            score,
194            reason: format!("usage {} (min: {}), rate {:.2}", tile.usage_count, self.config.min_usage_count, tile.success_rate),
195        }
196    }
197
198    /// Check domain format is non-empty and reasonable.
199    fn check_domain_format(&self, tile: &TileInput) -> ValidationCheck {
200        let domain = tile.domain.trim();
201        let passed = !domain.is_empty() && domain.len() <= 100;
202        ValidationCheck {
203            name: "domain_format".to_string(),
204            passed,
205            score: if passed { 1.0 } else { 0.0 },
206            reason: format!("domain: '{}'", domain),
207        }
208    }
209
210    /// Check if tile is too similar to existing tiles.
211    fn check_similar_existing(&self, tile: &TileInput) -> ValidationCheck {
212        let mut max_sim = 0.0_f64;
213        let mut most_similar = String::new();
214        for (id, content) in &self.existing_contents {
215            let sim = jaccard_similarity(&tile.content.to_lowercase(), &content.to_lowercase());
216            if sim > max_sim {
217                max_sim = sim;
218                most_similar = id.clone();
219            }
220        }
221        let passed = max_sim < self.config.similarity_threshold;
222        ValidationCheck {
223            name: "similarity".to_string(),
224            passed,
225            score: if self.existing_contents.is_empty() { 1.0 } else { 1.0 - max_sim },
226            reason: if most_similar.is_empty() {
227                "no existing tiles to compare".to_string()
228            } else {
229                format!("max similarity {:.2} to '{}' (threshold: {:.2})", max_sim, most_similar, self.config.similarity_threshold)
230            },
231        }
232    }
233}
234
235/// Jaccard similarity between two strings (word-level).
236fn jaccard_similarity(a: &str, b: &str) -> f64 {
237    let words_a: std::collections::HashSet<&str> = a.split_whitespace().collect();
238    let words_b: std::collections::HashSet<&str> = b.split_whitespace().collect();
239    if words_a.is_empty() && words_b.is_empty() { return 1.0; }
240    if words_a.is_empty() || words_b.is_empty() { return 0.0; }
241    let intersection = words_a.intersection(&words_b).count();
242    let union = words_a.union(&words_b).count();
243    intersection as f64 / union as f64
244}
245
246#[cfg(test)]
247mod tests {
248    use super::*;
249
250    fn make_tile(content: &str, confidence: f64) -> TileInput {
251        TileInput {
252            id: "t1".to_string(),
253            content: content.to_string(),
254            confidence,
255            domain: "testing".to_string(),
256            created_at: 1000,
257            refreshed_at: 5000,
258            usage_count: 10,
259            success_rate: 0.9,
260            tags: vec!["test".to_string()],
261        }
262    }
263
264    #[test]
265    fn test_accepts_good_tile() {
266        let v = TileValidator::with_defaults();
267        let tile = make_tile("This is a valid knowledge tile with sufficient content length.", 0.8);
268        let result = v.validate(&tile);
269        assert!(result.accepted);
270        assert!(result.overall_score >= 0.6);
271    }
272
273    #[test]
274    fn test_rejects_low_confidence() {
275        let v = TileValidator::with_defaults();
276        let tile = make_tile("Valid content here.", 0.1);
277        let result = v.validate(&tile);
278        let conf = result.get_check("confidence").unwrap();
279        assert!(!conf.passed);
280    }
281
282    #[test]
283    fn test_rejects_short_content() {
284        let v = TileValidator::with_defaults();
285        let tile = make_tile("too", 0.8);
286        let result = v.validate(&tile);
287        let len_check = result.get_check("content_length").unwrap();
288        assert!(!len_check.passed);
289    }
290
291    #[test]
292    fn test_detects_similar_existing() {
293        let mut v = TileValidator::with_defaults();
294        v.register_existing("existing", "Rust is a systems programming language focused on safety speed and performance with zero cost abstractions");
295        let tile = make_tile("Rust is a systems programming language focused on safety speed and performance with zero cost abstractions and concurrency", 0.8);
296        let result = v.validate(&tile);
297        let sim = result.get_check("similarity").unwrap();
298        assert!(!sim.passed, "similarity should be above threshold");
299    }
300
301    #[test]
302    fn test_new_tile_passes_usage_quality() {
303        let v = TileValidator::with_defaults();
304        let mut tile = make_tile("Valid content for a brand new tile.", 0.8);
305        tile.usage_count = 0;
306        tile.success_rate = 0.0;
307        let result = v.validate(&tile);
308        let uq = result.get_check("usage_quality").unwrap();
309        assert!(uq.passed);
310    }
311
312    #[test]
313    fn test_all_checks_run() {
314        let v = TileValidator::with_defaults();
315        let tile = make_tile("Content for testing all checks.", 0.5);
316        let result = v.validate(&tile);
317        assert_eq!(result.checks.len(), 6);
318        assert!(result.get_check("confidence").is_some());
319        assert!(result.get_check("content_length").is_some());
320        assert!(result.get_check("freshness").is_some());
321        assert!(result.get_check("usage_quality").is_some());
322        assert!(result.get_check("domain_format").is_some());
323        assert!(result.get_check("similarity").is_some());
324    }
325
326    #[test]
327    fn test_custom_config() {
328        let config = ValidationConfig {
329            min_confidence: 0.9,
330            min_content_length: 50,
331            acceptance_threshold: 0.95,
332            ..ValidationConfig::default()
333        };
334        let v = TileValidator::new(config);
335        let tile = make_tile("Short.", 0.8);
336        let result = v.validate(&tile);
337        assert!(!result.accepted);
338    }
339
340    #[test]
341    fn test_empty_domain_fails() {
342        let v = TileValidator::with_defaults();
343        let mut tile = make_tile("Valid content with enough length.", 0.8);
344        tile.domain = "".to_string();
345        let result = v.validate(&tile);
346        let dom = result.get_check("domain_format").unwrap();
347        assert!(!dom.passed);
348    }
349
350    #[test]
351    fn test_pass_fail_counts() {
352        let v = TileValidator::with_defaults();
353        let tile = make_tile("Valid content.", 0.1); // low confidence
354        let result = v.validate(&tile);
355        assert_eq!(result.pass_count() + result.fail_count(), result.checks.len());
356        assert!(result.fail_count() >= 1);
357    }
358
359    #[test]
360    fn test_freshness_decay() {
361        let config = ValidationConfig {
362            freshness_window_secs: 100,
363            ..ValidationConfig::default()
364        };
365        let v = TileValidator::new(config);
366        let mut tile = make_tile("Valid content for freshness test.", 0.8);
367        tile.refreshed_at = 500; // older than window
368        let result = v.validate(&tile);
369        let fresh = result.get_check("freshness").unwrap();
370        assert!(!fresh.passed);
371        assert!(fresh.score < 1.0);
372    }
373
374    #[test]
375    fn test_stale_tile_low_usage_rate() {
376        let v = TileValidator::with_defaults();
377        let mut tile = make_tile("Valid content here.", 0.8);
378        tile.usage_count = 100;
379        tile.success_rate = 0.2;
380        let result = v.validate(&tile);
381        let uq = result.get_check("usage_quality").unwrap();
382        // Should pass because min_success_rate defaults to 0.0
383        assert!(uq.passed);
384    }
385}