1use std::collections::HashMap;
14
15#[derive(Debug, Clone)]
18pub struct Tile {
19 pub id: String,
20 pub question: String,
21 pub answer: String,
22 pub domain: String,
23 pub confidence: f64,
24 pub polarity: Polarity,
25 pub tags: Vec<String>,
26 pub created_at: u64,
27 pub refreshed_at: u64,
28 pub use_count: u64,
29 pub success_rate: f64,
30 pub challenge_count: u64,
31 pub provenance: Provenance,
32 pub dependencies: Vec<String>,
33}
34
35#[derive(Debug, Clone, Copy, PartialEq)]
36pub enum Polarity { Positive, Negative, Neutral }
37impl Default for Polarity { fn default() -> Self { Polarity::Neutral } }
38
39#[derive(Debug, Clone)]
40pub struct Provenance {
41 pub created_by: String,
42 pub validation_method: String,
43 pub source_room: String,
44}
45impl Default for Provenance {
46 fn default() -> Self { Provenance { created_by: "unknown".into(), validation_method: "none".into(), source_room: "".into() } }
47}
48
49pub struct ValidationGate;
52
53impl ValidationGate {
54 pub fn confidence(tile: &Tile) -> bool { tile.confidence >= 0.3 }
55 pub fn content_length(tile: &Tile) -> bool {
56 tile.question.len() >= 10 && tile.answer.len() >= 10
57 }
58 pub fn domain_format(tile: &Tile) -> bool { !tile.domain.is_empty() }
59 pub fn freshness(tile: &Tile, now: u64) -> bool {
60 now.saturating_sub(tile.created_at) < 30 * 24 * 3600 }
62 pub fn validate_all(tile: &Tile, now: u64) -> (bool, Vec<&'static str>) {
63 let mut failures = Vec::new();
64 if !Self::confidence(tile) { failures.push("confidence"); }
65 if !Self::content_length(tile) { failures.push("content_length"); }
66 if !Self::domain_format(tile) { failures.push("domain_format"); }
67 if !Self::freshness(tile, now) { failures.push("freshness"); }
68 (failures.is_empty(), failures)
69 }
70}
71
72pub struct TileScorer;
75
76impl TileScorer {
77 pub fn score(tile: &Tile, query: &str, now: u64) -> f64 {
78 let keyword = Self::keyword_match(tile, query);
79 if keyword < 0.01 { return 0.0; }
80
81 let temporal = Self::temporal_signal(tile, now);
82 let frequency = Self::frequency_signal(tile);
83 let belief = tile.confidence;
84 let domain = 0.8; let controversy = Self::controversy_signal(tile);
86
87 keyword * 0.30 + belief * 0.25 + domain * 0.20 +
88 temporal * 0.15 + frequency * 0.10 + controversy * 0.10
89 }
90
91 fn keyword_match(tile: &Tile, query: &str) -> f64 {
92 let q_words: std::collections::HashSet<&str> = query.split_whitespace().collect();
93 let t_words: std::collections::HashSet<&str> =
94 tile.question.split_whitespace().chain(tile.answer.split_whitespace()).collect();
95 if q_words.is_empty() || t_words.is_empty() { return 0.0; }
96 let intersection = q_words.intersection(&t_words).count();
97 intersection as f64 / q_words.len().max(t_words.len()) as f64
98 }
99
100 fn temporal_signal(tile: &Tile, now: u64) -> f64 {
101 let age_hours = now.saturating_sub(tile.refreshed_at) as f64 / 3600.0;
102 (0.5_f64).powf(age_hours / 168.0) }
104
105 fn frequency_signal(tile: &Tile) -> f64 {
106 (tile.use_count as f64).min(10.0) / 10.0
107 }
108
109 fn controversy_signal(tile: &Tile) -> f64 {
110 if tile.challenge_count == 0 { return 0.0; }
111 (tile.challenge_count as f64 * 0.05).min(0.3)
113 }
114}
115
116#[derive(Debug, Clone)]
119pub struct StoredTile {
120 pub tile: Tile,
121 pub version: u32,
122 pub parent_version: Option<u32>,
123 pub stored_at: u64,
124}
125
126pub struct TileStore {
127 tiles: HashMap<String, Vec<StoredTile>>,
128}
129
130impl TileStore {
131 pub fn new() -> Self { TileStore { tiles: HashMap::new() } }
132
133 pub fn insert(&mut self, tile: Tile, now: u64) -> u32 {
134 let version = self.next_version(&tile.id);
135 let stored = StoredTile { tile, version, parent_version: if version > 1 { Some(version - 1) } else { None }, stored_at: now };
136 self.tiles.entry(stored.tile.id.clone()).or_default().push(stored);
137 version
138 }
139
140 fn next_version(&self, id: &str) -> u32 {
141 self.tiles.get(id).map(|v| v.last().map(|s| s.version + 1).unwrap_or(1)).unwrap_or(1)
142 }
143
144 pub fn get_latest(&self, id: &str) -> Option<&StoredTile> {
145 self.tiles.get(id).and_then(|v| v.last())
146 }
147
148 pub fn get_version(&self, id: &str, version: u32) -> Option<&StoredTile> {
149 self.tiles.get(id).and_then(|v| v.iter().find(|s| s.version == version))
150 }
151
152 pub fn version_count(&self, id: &str) -> usize {
153 self.tiles.get(id).map(|v| v.len()).unwrap_or(0)
154 }
155
156 pub fn all_latest(&self) -> Vec<&StoredTile> {
157 self.tiles.values().filter_map(|v| v.last()).collect()
158 }
159}
160
161pub struct TileDedup;
164
165impl TileDedup {
166 pub fn is_duplicate(a: &Tile, b: &Tile) -> bool {
167 if a.question == b.question && a.answer == b.answer { return true; }
169 let jaccard = Self::jaccard(&a.question, &b.question);
171 if jaccard > 0.9 { return true; }
172 false
173 }
174
175 fn jaccard(a: &str, b: &str) -> f64 {
176 let sa: std::collections::HashSet<&str> = a.split_whitespace().collect();
177 let sb: std::collections::HashSet<&str> = b.split_whitespace().collect();
178 if sa.is_empty() && sb.is_empty() { return 1.0; }
179 if sa.is_empty() || sb.is_empty() { return 0.0; }
180 let intersection = sa.intersection(&sb).count();
181 let union = sa.union(&sb).count();
182 intersection as f64 / union as f64
183 }
184
185 pub fn dedup_batch(tiles: &[Tile]) -> Vec<&Tile> {
186 let mut kept: Vec<&Tile> = Vec::new();
187 for tile in tiles {
188 if !kept.iter().any(|k| Self::is_duplicate(k, tile)) {
189 kept.push(tile);
190 }
191 }
192 kept
193 }
194}
195
196pub struct TileVersion;
199
200impl TileVersion {
201 pub fn diff(a: &Tile, b: &Tile) -> Vec<String> {
202 let mut changes = Vec::new();
203 if a.question != b.question { changes.push("question_changed".into()); }
204 if a.answer != b.answer { changes.push("answer_changed".into()); }
205 if a.domain != b.domain { changes.push("domain_changed".into()); }
206 if (a.confidence - b.confidence).abs() > 0.01 { changes.push("confidence_changed".into()); }
207 changes
208 }
209}
210
211pub struct TileCascade;
214
215impl TileCascade {
216 pub fn dependents_of<'a>(tile_id: &str, all_tiles: &'a [Tile]) -> Vec<&'a Tile> {
217 all_tiles.iter().filter(|t| t.dependencies.contains(&tile_id.to_string())).collect()
218 }
219}
220
221pub struct Pipeline {
224 store: TileStore,
225 tiles: Vec<Tile>,
226}
227
228impl Pipeline {
229 pub fn new() -> Self { Pipeline { store: TileStore::new(), tiles: Vec::new() } }
230
231 pub fn process(&mut self, tile: Tile, query: &str, now: u64) -> PipelineResult {
233 let (valid, failures) = ValidationGate::validate_all(&tile, now);
235 if !valid {
236 return PipelineResult::Rejected { failures };
237 }
238
239 let score = TileScorer::score(&tile, query, now);
241
242 let is_dup = self.tiles.iter().any(|existing| TileDedup::is_duplicate(existing, &tile));
244
245 let version = self.store.insert(tile.clone(), now);
247 self.tiles.push(tile.clone());
248
249 PipelineResult::Accepted {
250 tile_id: tile.id.clone(),
251 score,
252 version,
253 is_duplicate: is_dup,
254 }
255 }
256
257 pub fn search(&self, query: &str, now: u64) -> Vec<(f64, &Tile)> {
259 self.store.all_latest().iter().map(|stored| {
260 (TileScorer::score(&stored.tile, query, now), &stored.tile)
261 }).filter(|(s, _)| *s > 0.0).collect()
262 }
263
264 pub fn get(&self, id: &str) -> Option<&StoredTile> {
266 self.store.get_latest(id)
267 }
268
269 pub fn history(&self, id: &str) -> usize {
271 self.store.version_count(id)
272 }
273}
274
275pub enum PipelineResult {
276 Accepted { tile_id: String, score: f64, version: u32, is_duplicate: bool },
277 Rejected { failures: Vec<&'static str> },
278}
279
280fn now() -> u64 {
281 std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).map(|d| d.as_secs()).unwrap_or(0)
282}
283
284fn make_tile(id: &str, q: &str, a: &str, domain: &str, conf: f64) -> Tile {
285 let n = now();
286 Tile { id: id.into(), question: q.into(), answer: a.into(), domain: domain.into(),
287 confidence: conf, polarity: Polarity::Neutral, tags: vec![],
288 created_at: n, refreshed_at: n, use_count: 0, success_rate: 1.0,
289 challenge_count: 0, provenance: Provenance::default(), dependencies: vec![] }
290}
291
292#[cfg(test)]
293mod tests {
294 use super::*;
295
296 #[test]
297 fn test_full_pipeline_accept_valid_tile() {
298 let mut p = Pipeline::new();
299 let tile = make_tile("t1", "What is constraint theory?", "Geometric snapping for deterministic computation.", "constraint_theory", 0.9);
300 let result = p.process(tile, "constraint theory", now());
301 match result {
302 PipelineResult::Accepted { score, version, .. } => {
303 assert!(score > 0.0);
304 assert_eq!(version, 1);
305 }
306 PipelineResult::Rejected { .. } => panic!("Valid tile should be accepted"),
307 }
308 }
309
310 #[test]
311 fn test_full_pipeline_reject_low_confidence() {
312 let mut p = Pipeline::new();
313 let tile = make_tile("t2", "Short", "Short", "test", 0.1);
314 let result = p.process(tile, "test", now());
315 match result {
316 PipelineResult::Rejected { failures } => {
317 assert!(failures.contains(&"confidence"));
318 }
319 PipelineResult::Accepted { .. } => panic!("Low confidence tile should be rejected"),
320 }
321 }
322
323 #[test]
324 fn test_full_pipeline_reject_short_content() {
325 let mut p = Pipeline::new();
326 let tile = make_tile("t3", "Q?", "A.", "test", 0.9);
327 let result = p.process(tile, "test", now());
328 match result {
329 PipelineResult::Rejected { failures } => {
330 assert!(failures.contains(&"content_length"));
331 }
332 PipelineResult::Accepted { .. } => panic!("Short content should be rejected"),
333 }
334 }
335
336 #[test]
337 fn test_full_pipeline_versioning() {
338 let mut p = Pipeline::new();
339 let t1 = make_tile("v1", "What is PLATO?", "Training pipeline for agents.", "plato", 0.9);
340 p.process(t1, "plato", now());
341 assert_eq!(p.history("v1"), 1);
342
343 let t2 = make_tile("v1", "What is PLATO?", "Training pipeline with tiles, rooms, and ensigns.", "plato", 0.95);
344 p.process(t2, "plato", now());
345 assert_eq!(p.history("v1"), 2);
346
347 let latest = p.get("v1").unwrap();
348 assert_eq!(latest.version, 2);
349 assert!(latest.tile.answer.contains("ensigns"));
350 }
351
352 #[test]
353 fn test_full_pipeline_dedup_detection() {
354 let mut p = Pipeline::new();
355 let t1 = make_tile("d1", "What is deadband?", "P0/P1/P2 priority protocol.", "deadband", 0.9);
356 p.process(t1, "deadband", now());
357
358 let t2 = make_tile("d2", "What is deadband?", "P0/P1/P2 priority protocol.", "deadband", 0.9);
359 let result = p.process(t2, "deadband", now());
360 match result {
361 PipelineResult::Accepted { is_duplicate, .. } => {
362 assert!(is_duplicate);
363 }
364 PipelineResult::Rejected { .. } => panic!("Should accept but flag as dup"),
365 }
366 }
367
368 #[test]
369 fn test_full_pipeline_search_ranks_correctly() {
370 let mut p = Pipeline::new();
371 p.process(make_tile("s1", "What is flux?", "Bytecode runtime.", "flux", 0.8), "flux", now());
372 p.process(make_tile("s2", "What is fishing?", "Catching fish.", "fishing", 0.9), "flux", now());
373 p.process(make_tile("s3", "What is flux runtime?", "Deterministic bytecode VM.", "flux", 0.9), "flux runtime", now());
374
375 let results = p.search("flux runtime", now());
376 assert!(results.len() >= 1);
377 assert_eq!(results[0].1.id, "s3");
379 }
380
381 #[test]
382 fn test_full_pipeline_controversy_boost() {
383 let mut p = Pipeline::new();
384 let mut tile = make_tile("c1", "What is LoRA?", "Low-rank adaptation for fine-tuning.", "training", 0.9);
385 tile.challenge_count = 5;
386 p.process(tile, "lora training adaptation", now());
387
388 let results = p.search("lora training adaptation", now());
389 assert!(!results.is_empty());
390 assert!(results[0].0 > 0.1);
391 }
392
393 #[test]
394 fn test_full_pipeline_dependency_cascade() {
395 let mut p = Pipeline::new();
396 let mut t1 = make_tile("dep1", "What is a tile?", "Atomic knowledge unit.", "plato", 0.9);
397 t1.dependencies = vec!["dep0".into()]; p.process(t1, "tile", now());
399
400 let mut t2 = make_tile("dep2", "What is a room?", "Tile collection.", "plato", 0.9);
401 t2.dependencies = vec!["dep1".into()]; p.process(t2, "room", now());
403
404 let deps = TileCascade::dependents_of("dep1", &p.tiles);
405 assert_eq!(deps.len(), 1);
406 assert_eq!(deps[0].id, "dep2");
407 }
408
409 #[test]
410 fn test_full_pipeline_ten_tiles() {
411 let mut p = Pipeline::new();
412 let tiles = vec![
413 make_tile("batch1", "What is constraint theory?", "Geometric snapping.", "constraint_theory", 0.9),
414 make_tile("batch2", "What is deadband protocol?", "P0 rocks, P1 channels, P2 optimize.", "deadband", 0.85),
415 make_tile("batch3", "What is a holodeck?", "MUD environment for agents.", "holodeck", 0.8),
416 make_tile("batch4", "What is flux?", "Deterministic bytecode runtime.", "flux", 0.9),
417 make_tile("batch5", "What is an ensign?", "Compressed expertise adapter.", "plato", 0.85),
418 make_tile("batch6", "What is a tile?", "Atomic Q/A knowledge unit.", "plato", 0.95),
419 make_tile("batch7", "What is a room?", "Thematic tile collection.", "plato", 0.9),
420 make_tile("batch8", "What is the forge?", "GPU training pipeline.", "training", 0.8),
421 make_tile("batch9", "What is lab guard?", "Hypothesis gating.", "training", 0.85),
422 make_tile("batch10", "What is ghost tile?", "Decayed inactive knowledge.", "plato", 0.8),
423 ];
424 let mut accepted = 0;
425 for tile in tiles {
426 if let PipelineResult::Accepted { .. } = p.process(tile, "plato training", now()) {
427 accepted += 1;
428 }
429 }
430 assert_eq!(accepted, 10);
431
432 let results = p.search("plato tile room ensign training knowledge", now());
433 assert!(results.len() >= 2); }
435
436 #[test]
437 fn test_validation_gate_confidence() {
438 let good = make_tile("g", "What is PLATO?", "Training pipeline.", "plato", 0.9);
439 let bad = make_tile("b", "What?", "Low conf.", "plato", 0.1);
440 assert!(ValidationGate::confidence(&good));
441 assert!(!ValidationGate::confidence(&bad));
442 }
443
444 #[test]
445 fn test_scorer_keyword_gating() {
446 let tile = make_tile("kg", "What is quantum computing?", "Qubits and superposition.", "physics", 0.9);
447 let score_relevant = TileScorer::score(&tile, "quantum computing qubits", now());
448 let score_irrelevant = TileScorer::score(&tile, "fishing boats anchors", now());
449 assert!(score_relevant > 0.1);
450 assert!(score_irrelevant < 0.01); }
452
453 #[test]
454 fn test_dedup_exact_and_fuzzy() {
455 let a = make_tile("da", "What is flux?", "Bytecode runtime for agents.", "flux", 0.9);
456 let b = make_tile("db", "What is flux?", "Bytecode runtime for agents.", "flux", 0.9);
457 let c = make_tile("dc", "What is flux runtime?", "Bytecode VM for deterministic execution.", "flux", 0.9);
458 assert!(TileDedup::is_duplicate(&a, &b)); let batch = vec![a, b, c];
461 assert_eq!(TileDedup::dedup_batch(&batch).len(), 2); }
463
464 #[test]
465 fn test_version_diff() {
466 let a = make_tile("va", "Q?", "A1", "d", 0.9);
467 let b = make_tile("vb", "Q?", "A2", "d", 0.9);
468 let c = make_tile("vc", "Q?", "A2", "d2", 0.9);
469 let diff_ab = TileVersion::diff(&a, &b);
470 let diff_bc = TileVersion::diff(&b, &c);
471 assert!(diff_ab.iter().any(|s| s == "answer_changed"));
472 assert!(diff_bc.iter().any(|s| s == "domain_changed"));
473 }
474}