Skip to main content

plato_tile_scorer/
lib.rs

1//! plato-tile-scorer v2 — Multi-signal scoring with controversy signal
2//! From DeepSeek's insight: "Evolution needs predators"
3//! Tiles that survive counterpoints are MORE reliable than unchallenged tiles
4
5/// Scoring signals with configurable weights
6#[derive(Debug, Clone)]
7pub struct ScoringWeights {
8    pub temporal: f64,
9    pub ghost: f64,
10    pub belief: f64,
11    pub domain: f64,
12    pub frequency: f64,
13    pub keyword: f64,
14    pub controversy: f64,    // NEW: counterpoint survival
15    pub usage_quality: f64,  // NEW: success rate weighting
16}
17
18impl Default for ScoringWeights {
19    fn default() -> Self {
20        Self {
21            temporal: 0.10,
22            ghost: 0.10,
23            belief: 0.20,
24            domain: 0.15,
25            frequency: 0.10,
26            keyword: 0.25,
27            controversy: 0.05,
28            usage_quality: 0.05,
29        }
30    }
31}
32
33/// Input signals for scoring a single tile
34#[derive(Debug, Clone)]
35pub struct ScoringInput {
36    pub query: String,
37    pub tile_content: String,
38    pub tile_question: String,
39
40    // v1 signals
41    pub temporal_score: f64,       // 0.0-1.0 (freshness)
42    pub ghost_score: f64,          // 0.0-1.0 (resurrection priority)
43    pub belief_score: f64,         // 0.0-1.0 (DCS consensus)
44    pub domain_relevance: f64,     // 0.0-1.0 (domain match)
45    pub access_frequency: f64,     // 0.0-1.0 (normalized)
46    pub keyword_match: f64,        // 0.0-1.0
47
48    // v2 signals (from JC1's research)
49    pub controversy_score: f64,    // 0.0-1.0 (counterpoints survived)
50    pub usage_count: u64,          // raw count
51    pub success_rate: f64,         // 0.0-1.0
52    pub confidence: f64,           // 0.0-1.0
53    pub has_counterpoints: bool,
54    pub is_challenged: bool,       // has at least 1 counterpoint
55    pub tile_age_seconds: u64,
56}
57
58impl ScoringInput {
59    pub fn minimal(query: &str, content: &str) -> Self {
60        Self {
61            query: query.to_string(),
62            tile_content: content.to_string(),
63            tile_question: String::new(),
64            temporal_score: 1.0,
65            ghost_score: 0.0,
66            belief_score: 0.5,
67            domain_relevance: 0.5,
68            access_frequency: 0.0,
69            keyword_match: 0.5,
70            controversy_score: 0.0,
71            usage_count: 0,
72            success_rate: 1.0,
73            confidence: 0.5,
74            has_counterpoints: false,
75            is_challenged: false,
76            tile_age_seconds: 0,
77        }
78    }
79}
80
81/// Score result
82#[derive(Debug, Clone)]
83pub struct ScoreResult {
84    pub total: f64,
85    pub signals: SignalBreakdown,
86    pub gated: bool,
87    pub gate_reason: Option<String>,
88}
89
90#[derive(Debug, Clone)]
91pub struct SignalBreakdown {
92    pub temporal: f64,
93    pub ghost: f64,
94    pub belief: f64,
95    pub domain: f64,
96    pub frequency: f64,
97    pub keyword: f64,
98    pub controversy: f64,
99    pub usage_quality: f64,
100}
101
102/// Multi-signal tile scorer
103pub struct TileScorer {
104    weights: ScoringWeights,
105    keyword_gate: f64,       // Below this → score 0.0
106    controversy_floor: f64,  // Min controversy for unchallenged tiles
107}
108
109impl TileScorer {
110    pub fn new() -> Self {
111        Self {
112            weights: ScoringWeights::default(),
113            keyword_gate: 0.01,
114            controversy_floor: 0.3,
115        }
116    }
117
118    pub fn with_weights(weights: ScoringWeights) -> Self {
119        Self {
120            weights,
121            keyword_gate: 0.01,
122            controversy_floor: 0.3,
123        }
124    }
125
126    /// Score a single tile
127    pub fn score(&self, input: &ScoringInput) -> ScoreResult {
128        // Keyword gate: if keyword match is too low, tile is irrelevant
129        if input.keyword_match < self.keyword_gate {
130            return ScoreResult {
131                total: 0.0,
132                signals: SignalBreakdown::zero(),
133                gated: true,
134                gate_reason: Some("keyword_match_below_gate".to_string()),
135            };
136        }
137
138        // Controversy signal: unchallenged tiles get a floor value
139        // DeepSeek's insight: untested reliability < tested reliability
140        let controversy = if input.has_counterpoints {
141            input.controversy_score
142        } else {
143            // Unchallenged tiles: penalize slightly (unknown reliability)
144            self.controversy_floor * input.confidence
145        };
146
147        // Usage quality: success rate weighted by usage volume
148        // A tile used 100 times at 90% success is better than used once at 100%
149        let usage_factor = (input.usage_count as f64 + 1.0).ln() / 10.0; // Normalize ln
150        let usage_quality = input.success_rate * usage_factor.min(1.0);
151
152        // Compute weighted signals
153        let temporal = self.weights.temporal * input.temporal_score;
154        let ghost = self.weights.ghost * input.ghost_score;
155        let belief = self.weights.belief * input.belief_score;
156        let domain = self.weights.domain * input.domain_relevance;
157        let frequency = self.weights.frequency * input.access_frequency;
158        let keyword = self.weights.keyword * input.keyword_match;
159        let controversy_w = self.weights.controversy * controversy;
160        let usage_w = self.weights.usage_quality * usage_quality;
161
162        let total = temporal + ghost + belief + domain + frequency + keyword + controversy_w + usage_w;
163
164        ScoreResult {
165            total,
166            signals: SignalBreakdown {
167                temporal: input.temporal_score,
168                ghost: input.ghost_score,
169                belief: input.belief_score,
170                domain: input.domain_relevance,
171                frequency: input.access_frequency,
172                keyword: input.keyword_match,
173                controversy,
174                usage_quality,
175            },
176            gated: false,
177            gate_reason: None,
178        }
179    }
180
181    /// Score and rank multiple tiles
182    pub fn rank(&self, inputs: &[ScoringInput]) -> Vec<(usize, ScoreResult)> {
183        let mut results: Vec<(usize, ScoreResult)> = inputs
184            .iter()
185            .enumerate()
186            .map(|(i, input)| (i, self.score(input)))
187            .collect();
188        results.sort_by(|a, b| b.1.total.partial_cmp(&a.1.total).unwrap());
189        results
190    }
191
192    /// Score with deadband priority boost
193    pub fn score_with_deadband(&self, input: &ScoringInput, priority: &str) -> ScoreResult {
194        let mut result = self.score(input);
195        match priority {
196            "P0" => result.total += 10.0,
197            "P1" => result.total += 1.0,
198            _ => {}
199        }
200        result
201    }
202
203    /// Top-N selection
204    pub fn top_n(&self, inputs: &[ScoringInput], n: usize) -> Vec<(usize, ScoreResult)> {
205        self.rank(inputs).into_iter().take(n).collect()
206    }
207}
208
209impl SignalBreakdown {
210    pub fn zero() -> Self {
211        Self {
212            temporal: 0.0, ghost: 0.0, belief: 0.0, domain: 0.0,
213            frequency: 0.0, keyword: 0.0, controversy: 0.0, usage_quality: 0.0,
214        }
215    }
216}
217
218#[cfg(test)]
219mod tests {
220    use super::*;
221
222    fn make_input(query: &str, keyword: f64, confidence: f64) -> ScoringInput {
223        let mut input = ScoringInput::minimal(query, "tile content");
224        input.keyword_match = keyword;
225        input.confidence = confidence;
226        input
227    }
228
229    #[test]
230    fn test_basic_scoring() {
231        let scorer = TileScorer::new();
232        let input = make_input("rust programming", 0.8, 0.9);
233        let result = scorer.score(&input);
234        assert!(result.total > 0.0);
235        assert!(!result.gated);
236    }
237
238    #[test]
239    fn test_keyword_gate_blocks() {
240        let scorer = TileScorer::new();
241        let mut input = make_input("quantum physics", 0.001, 0.9);
242        input.keyword_match = 0.001;
243        let result = scorer.score(&input);
244        assert_eq!(result.total, 0.0);
245        assert!(result.gated);
246        assert_eq!(result.gate_reason, Some("keyword_match_below_gate".to_string()));
247    }
248
249    #[test]
250    fn test_keyword_gate_passes() {
251        let scorer = TileScorer::new();
252        let mut input = make_input("rust", 0.02, 0.9);
253        let result = scorer.score(&input);
254        assert!(result.total > 0.0);
255        assert!(!result.gated);
256    }
257
258    #[test]
259    fn test_controversy_boost() {
260        let scorer = TileScorer::new();
261        // Challenged tile with high controversy and high usage
262        let mut challenged = make_input("rust", 0.8, 0.9);
263        challenged.has_counterpoints = true;
264        challenged.controversy_score = 0.9;
265        challenged.usage_count = 50;
266        challenged.success_rate = 0.95;
267        // Unchallenged tile with same base
268        let mut unchallenged = make_input("rust", 0.8, 0.9);
269        unchallenged.has_counterpoints = false;
270        unchallenged.usage_count = 50;
271        unchallenged.success_rate = 0.95;
272
273        let r1 = scorer.score(&challenged);
274        let r2 = scorer.score(&unchallenged);
275        // Challenged tile should score higher (tested reliability)
276        assert!(r1.total > r2.total, "challenged ({}) should beat unchallenged ({})", r1.total, r2.total);
277    }
278
279    #[test]
280    fn test_usage_quality_signal() {
281        let scorer = TileScorer::new();
282        // Highly used, high success rate
283        let mut popular = make_input("rust", 0.8, 0.9);
284        popular.usage_count = 100;
285        popular.success_rate = 0.95;
286        // Rarely used
287        let mut rare = make_input("rust", 0.8, 0.9);
288        rare.usage_count = 1;
289        rare.success_rate = 1.0;
290
291        let r1 = scorer.score(&popular);
292        let r2 = scorer.score(&rare);
293        // Popular with high success should score higher
294        assert!(r1.signals.usage_quality > r2.signals.usage_quality);
295    }
296
297    #[test]
298    fn test_ranking_order() {
299        let scorer = TileScorer::new();
300        let inputs = vec![
301            make_input("rust", 0.3, 0.5),
302            make_input("rust", 0.9, 0.9),
303            make_input("rust", 0.6, 0.7),
304        ];
305        let ranked = scorer.rank(&inputs);
306        assert_eq!(ranked[0].0, 1); // Highest keyword+confidence first
307    }
308
309    #[test]
310    fn test_top_n() {
311        let scorer = TileScorer::new();
312        let inputs = vec![
313            make_input("a", 0.3, 0.5),
314            make_input("b", 0.9, 0.9),
315            make_input("c", 0.6, 0.7),
316            make_input("d", 0.8, 0.8),
317        ];
318        let top2 = scorer.top_n(&inputs, 2);
319        assert_eq!(top2.len(), 2);
320        assert!(top2[0].1.total >= top2[1].1.total);
321    }
322
323    #[test]
324    fn test_deadband_priority_boost() {
325        let scorer = TileScorer::new();
326        let input = make_input("rust", 0.5, 0.8);
327        let normal = scorer.score(&input);
328        let p0 = scorer.score_with_deadband(&input, "P0");
329        let p1 = scorer.score_with_deadband(&input, "P1");
330        assert!((p0.total - normal.total - 10.0).abs() < 0.01);
331        assert!((p1.total - normal.total - 1.0).abs() < 0.01);
332    }
333
334    #[test]
335    fn test_signal_breakdown() {
336        let scorer = TileScorer::new();
337        let input = make_input("rust", 0.5, 0.8);
338        let result = scorer.score(&input);
339        assert!(result.signals.keyword > 0.0);
340        assert!(result.signals.temporal >= 0.0);
341        assert!(result.signals.controversy >= 0.0);
342        assert!(result.signals.usage_quality >= 0.0);
343    }
344
345    #[test]
346    fn test_custom_weights() {
347        let weights = ScoringWeights {
348            temporal: 0.0, ghost: 0.0, belief: 0.0, domain: 0.0,
349            frequency: 0.0, keyword: 1.0, controversy: 0.0, usage_quality: 0.0,
350        };
351        let scorer = TileScorer::with_weights(weights);
352        let input = make_input("rust", 0.7, 0.5);
353        let result = scorer.score(&input);
354        assert!((result.total - 0.7).abs() < 0.01);
355    }
356
357    #[test]
358    fn test_controversy_floor_for_unchallenged() {
359        let scorer = TileScorer::new();
360        let mut input = make_input("rust", 0.8, 1.0);
361        input.has_counterpoints = false;
362        let result = scorer.score(&input);
363        // Floor = 0.3 * 1.0 = 0.3, weighted by 0.05 = 0.015
364        assert!(result.signals.controversy > 0.0);
365    }
366
367    #[test]
368    fn test_controversy_signal_contributes() {
369        let scorer = TileScorer::new();
370        // Same base tile, but challenged with controversy
371        let mut challenged = make_input("rust", 0.8, 0.9);
372        challenged.has_counterpoints = true;
373        challenged.controversy_score = 1.0;
374        let mut unchallenged = make_input("rust", 0.8, 0.9);
375        unchallenged.has_counterpoints = false;
376        let r1 = scorer.score(&challenged);
377        let r2 = scorer.score(&unchallenged);
378        // Challenged tile should have higher controversy signal
379        assert!(r1.signals.controversy > r2.signals.controversy);
380    }
381
382    #[test]
383    fn test_zero_confidence_scores_low() {
384        let scorer = TileScorer::new();
385        let mut input = make_input("rust", 0.5, 0.0);
386        input.has_counterpoints = false;
387        let result = scorer.score(&input);
388        // With 0 confidence: controversy floor = 0, usage_quality = 0
389        // But temporal(1.0*0.1) + belief(0.5*0.2) + domain(0.5*0.15) + keyword(0.5*0.25) = 0.1+0.1+0.075+0.125 = 0.4
390        // That's still modest compared to a high-confidence tile
391        assert!(result.total < 0.5, "total should be modest, got {}", result.total);
392    }
393
394    #[test]
395    fn test_all_signals_contribute() {
396        let scorer = TileScorer::new();
397        let mut input = ScoringInput::minimal("test", "content");
398        input.temporal_score = 1.0;
399        input.ghost_score = 1.0;
400        input.belief_score = 1.0;
401        input.domain_relevance = 1.0;
402        input.access_frequency = 1.0;
403        input.keyword_match = 1.0;
404        input.has_counterpoints = true;
405        input.controversy_score = 1.0;
406        input.usage_count = 1000;
407        input.success_rate = 1.0;
408        input.confidence = 1.0;
409        let result = scorer.score(&input);
410        assert!(result.total > 0.5);
411    }
412
413    #[test]
414    fn test_gated_result_has_zero_breakdown() {
415        let scorer = TileScorer::new();
416        let mut input = make_input("q", 0.001, 0.9);
417        let result = scorer.score(&input);
418        assert_eq!(result.signals.keyword, 0.0);
419        assert_eq!(result.signals.temporal, 0.0);
420    }
421
422    #[test]
423    fn test_empty_query() {
424        let scorer = TileScorer::new();
425        let input = make_input("", 0.5, 0.8);
426        let result = scorer.score(&input);
427        assert!(result.total >= 0.0);
428    }
429}