use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct Tile {
pub id: String,
pub question: String,
pub answer: String,
pub domain: String,
pub confidence: f64,
pub polarity: Polarity,
pub tags: Vec<String>,
pub created_at: u64,
pub refreshed_at: u64,
pub use_count: u64,
pub success_rate: f64,
pub challenge_count: u64,
pub provenance: Provenance,
pub dependencies: Vec<String>,
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Polarity { Positive, Negative, Neutral }
impl Default for Polarity { fn default() -> Self { Polarity::Neutral } }
#[derive(Debug, Clone)]
pub struct Provenance {
pub created_by: String,
pub validation_method: String,
pub source_room: String,
}
impl Default for Provenance {
fn default() -> Self { Provenance { created_by: "unknown".into(), validation_method: "none".into(), source_room: "".into() } }
}
pub struct ValidationGate;
impl ValidationGate {
pub fn confidence(tile: &Tile) -> bool { tile.confidence >= 0.3 }
pub fn content_length(tile: &Tile) -> bool {
tile.question.len() >= 10 && tile.answer.len() >= 10
}
pub fn domain_format(tile: &Tile) -> bool { !tile.domain.is_empty() }
pub fn freshness(tile: &Tile, now: u64) -> bool {
now.saturating_sub(tile.created_at) < 30 * 24 * 3600 }
pub fn validate_all(tile: &Tile, now: u64) -> (bool, Vec<&'static str>) {
let mut failures = Vec::new();
if !Self::confidence(tile) { failures.push("confidence"); }
if !Self::content_length(tile) { failures.push("content_length"); }
if !Self::domain_format(tile) { failures.push("domain_format"); }
if !Self::freshness(tile, now) { failures.push("freshness"); }
(failures.is_empty(), failures)
}
}
pub struct TileScorer;
impl TileScorer {
pub fn score(tile: &Tile, query: &str, now: u64) -> f64 {
let keyword = Self::keyword_match(tile, query);
if keyword < 0.01 { return 0.0; }
let temporal = Self::temporal_signal(tile, now);
let frequency = Self::frequency_signal(tile);
let belief = tile.confidence;
let domain = 0.8; let controversy = Self::controversy_signal(tile);
keyword * 0.30 + belief * 0.25 + domain * 0.20 +
temporal * 0.15 + frequency * 0.10 + controversy * 0.10
}
fn keyword_match(tile: &Tile, query: &str) -> f64 {
let q_words: std::collections::HashSet<&str> = query.split_whitespace().collect();
let t_words: std::collections::HashSet<&str> =
tile.question.split_whitespace().chain(tile.answer.split_whitespace()).collect();
if q_words.is_empty() || t_words.is_empty() { return 0.0; }
let intersection = q_words.intersection(&t_words).count();
intersection as f64 / q_words.len().max(t_words.len()) as f64
}
fn temporal_signal(tile: &Tile, now: u64) -> f64 {
let age_hours = now.saturating_sub(tile.refreshed_at) as f64 / 3600.0;
(0.5_f64).powf(age_hours / 168.0) }
fn frequency_signal(tile: &Tile) -> f64 {
(tile.use_count as f64).min(10.0) / 10.0
}
fn controversy_signal(tile: &Tile) -> f64 {
if tile.challenge_count == 0 { return 0.0; }
(tile.challenge_count as f64 * 0.05).min(0.3)
}
}
#[derive(Debug, Clone)]
pub struct StoredTile {
pub tile: Tile,
pub version: u32,
pub parent_version: Option<u32>,
pub stored_at: u64,
}
pub struct TileStore {
tiles: HashMap<String, Vec<StoredTile>>,
}
impl TileStore {
pub fn new() -> Self { TileStore { tiles: HashMap::new() } }
pub fn insert(&mut self, tile: Tile, now: u64) -> u32 {
let version = self.next_version(&tile.id);
let stored = StoredTile { tile, version, parent_version: if version > 1 { Some(version - 1) } else { None }, stored_at: now };
self.tiles.entry(stored.tile.id.clone()).or_default().push(stored);
version
}
fn next_version(&self, id: &str) -> u32 {
self.tiles.get(id).map(|v| v.last().map(|s| s.version + 1).unwrap_or(1)).unwrap_or(1)
}
pub fn get_latest(&self, id: &str) -> Option<&StoredTile> {
self.tiles.get(id).and_then(|v| v.last())
}
pub fn get_version(&self, id: &str, version: u32) -> Option<&StoredTile> {
self.tiles.get(id).and_then(|v| v.iter().find(|s| s.version == version))
}
pub fn version_count(&self, id: &str) -> usize {
self.tiles.get(id).map(|v| v.len()).unwrap_or(0)
}
pub fn all_latest(&self) -> Vec<&StoredTile> {
self.tiles.values().filter_map(|v| v.last()).collect()
}
}
pub struct TileDedup;
impl TileDedup {
pub fn is_duplicate(a: &Tile, b: &Tile) -> bool {
if a.question == b.question && a.answer == b.answer { return true; }
let jaccard = Self::jaccard(&a.question, &b.question);
if jaccard > 0.9 { return true; }
false
}
fn jaccard(a: &str, b: &str) -> f64 {
let sa: std::collections::HashSet<&str> = a.split_whitespace().collect();
let sb: std::collections::HashSet<&str> = b.split_whitespace().collect();
if sa.is_empty() && sb.is_empty() { return 1.0; }
if sa.is_empty() || sb.is_empty() { return 0.0; }
let intersection = sa.intersection(&sb).count();
let union = sa.union(&sb).count();
intersection as f64 / union as f64
}
pub fn dedup_batch(tiles: &[Tile]) -> Vec<&Tile> {
let mut kept: Vec<&Tile> = Vec::new();
for tile in tiles {
if !kept.iter().any(|k| Self::is_duplicate(k, tile)) {
kept.push(tile);
}
}
kept
}
}
pub struct TileVersion;
impl TileVersion {
pub fn diff(a: &Tile, b: &Tile) -> Vec<String> {
let mut changes = Vec::new();
if a.question != b.question { changes.push("question_changed".into()); }
if a.answer != b.answer { changes.push("answer_changed".into()); }
if a.domain != b.domain { changes.push("domain_changed".into()); }
if (a.confidence - b.confidence).abs() > 0.01 { changes.push("confidence_changed".into()); }
changes
}
}
pub struct TileCascade;
impl TileCascade {
pub fn dependents_of<'a>(tile_id: &str, all_tiles: &'a [Tile]) -> Vec<&'a Tile> {
all_tiles.iter().filter(|t| t.dependencies.contains(&tile_id.to_string())).collect()
}
}
pub struct Pipeline {
store: TileStore,
tiles: Vec<Tile>,
}
impl Pipeline {
pub fn new() -> Self { Pipeline { store: TileStore::new(), tiles: Vec::new() } }
pub fn process(&mut self, tile: Tile, query: &str, now: u64) -> PipelineResult {
let (valid, failures) = ValidationGate::validate_all(&tile, now);
if !valid {
return PipelineResult::Rejected { failures };
}
let score = TileScorer::score(&tile, query, now);
let is_dup = self.tiles.iter().any(|existing| TileDedup::is_duplicate(existing, &tile));
let version = self.store.insert(tile.clone(), now);
self.tiles.push(tile.clone());
PipelineResult::Accepted {
tile_id: tile.id.clone(),
score,
version,
is_duplicate: is_dup,
}
}
pub fn search(&self, query: &str, now: u64) -> Vec<(f64, &Tile)> {
self.store.all_latest().iter().map(|stored| {
(TileScorer::score(&stored.tile, query, now), &stored.tile)
}).filter(|(s, _)| *s > 0.0).collect()
}
pub fn get(&self, id: &str) -> Option<&StoredTile> {
self.store.get_latest(id)
}
pub fn history(&self, id: &str) -> usize {
self.store.version_count(id)
}
}
pub enum PipelineResult {
Accepted { tile_id: String, score: f64, version: u32, is_duplicate: bool },
Rejected { failures: Vec<&'static str> },
}
fn now() -> u64 {
std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).map(|d| d.as_secs()).unwrap_or(0)
}
fn make_tile(id: &str, q: &str, a: &str, domain: &str, conf: f64) -> Tile {
let n = now();
Tile { id: id.into(), question: q.into(), answer: a.into(), domain: domain.into(),
confidence: conf, polarity: Polarity::Neutral, tags: vec![],
created_at: n, refreshed_at: n, use_count: 0, success_rate: 1.0,
challenge_count: 0, provenance: Provenance::default(), dependencies: vec![] }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_full_pipeline_accept_valid_tile() {
let mut p = Pipeline::new();
let tile = make_tile("t1", "What is constraint theory?", "Geometric snapping for deterministic computation.", "constraint_theory", 0.9);
let result = p.process(tile, "constraint theory", now());
match result {
PipelineResult::Accepted { score, version, .. } => {
assert!(score > 0.0);
assert_eq!(version, 1);
}
PipelineResult::Rejected { .. } => panic!("Valid tile should be accepted"),
}
}
#[test]
fn test_full_pipeline_reject_low_confidence() {
let mut p = Pipeline::new();
let tile = make_tile("t2", "Short", "Short", "test", 0.1);
let result = p.process(tile, "test", now());
match result {
PipelineResult::Rejected { failures } => {
assert!(failures.contains(&"confidence"));
}
PipelineResult::Accepted { .. } => panic!("Low confidence tile should be rejected"),
}
}
#[test]
fn test_full_pipeline_reject_short_content() {
let mut p = Pipeline::new();
let tile = make_tile("t3", "Q?", "A.", "test", 0.9);
let result = p.process(tile, "test", now());
match result {
PipelineResult::Rejected { failures } => {
assert!(failures.contains(&"content_length"));
}
PipelineResult::Accepted { .. } => panic!("Short content should be rejected"),
}
}
#[test]
fn test_full_pipeline_versioning() {
let mut p = Pipeline::new();
let t1 = make_tile("v1", "What is PLATO?", "Training pipeline for agents.", "plato", 0.9);
p.process(t1, "plato", now());
assert_eq!(p.history("v1"), 1);
let t2 = make_tile("v1", "What is PLATO?", "Training pipeline with tiles, rooms, and ensigns.", "plato", 0.95);
p.process(t2, "plato", now());
assert_eq!(p.history("v1"), 2);
let latest = p.get("v1").unwrap();
assert_eq!(latest.version, 2);
assert!(latest.tile.answer.contains("ensigns"));
}
#[test]
fn test_full_pipeline_dedup_detection() {
let mut p = Pipeline::new();
let t1 = make_tile("d1", "What is deadband?", "P0/P1/P2 priority protocol.", "deadband", 0.9);
p.process(t1, "deadband", now());
let t2 = make_tile("d2", "What is deadband?", "P0/P1/P2 priority protocol.", "deadband", 0.9);
let result = p.process(t2, "deadband", now());
match result {
PipelineResult::Accepted { is_duplicate, .. } => {
assert!(is_duplicate);
}
PipelineResult::Rejected { .. } => panic!("Should accept but flag as dup"),
}
}
#[test]
fn test_full_pipeline_search_ranks_correctly() {
let mut p = Pipeline::new();
p.process(make_tile("s1", "What is flux?", "Bytecode runtime.", "flux", 0.8), "flux", now());
p.process(make_tile("s2", "What is fishing?", "Catching fish.", "fishing", 0.9), "flux", now());
p.process(make_tile("s3", "What is flux runtime?", "Deterministic bytecode VM.", "flux", 0.9), "flux runtime", now());
let results = p.search("flux runtime", now());
assert!(results.len() >= 1);
assert_eq!(results[0].1.id, "s3");
}
#[test]
fn test_full_pipeline_controversy_boost() {
let mut p = Pipeline::new();
let mut tile = make_tile("c1", "What is LoRA?", "Low-rank adaptation for fine-tuning.", "training", 0.9);
tile.challenge_count = 5;
p.process(tile, "lora training adaptation", now());
let results = p.search("lora training adaptation", now());
assert!(!results.is_empty());
assert!(results[0].0 > 0.1);
}
#[test]
fn test_full_pipeline_dependency_cascade() {
let mut p = Pipeline::new();
let mut t1 = make_tile("dep1", "What is a tile?", "Atomic knowledge unit.", "plato", 0.9);
t1.dependencies = vec!["dep0".into()]; p.process(t1, "tile", now());
let mut t2 = make_tile("dep2", "What is a room?", "Tile collection.", "plato", 0.9);
t2.dependencies = vec!["dep1".into()]; p.process(t2, "room", now());
let deps = TileCascade::dependents_of("dep1", &p.tiles);
assert_eq!(deps.len(), 1);
assert_eq!(deps[0].id, "dep2");
}
#[test]
fn test_full_pipeline_ten_tiles() {
let mut p = Pipeline::new();
let tiles = vec![
make_tile("batch1", "What is constraint theory?", "Geometric snapping.", "constraint_theory", 0.9),
make_tile("batch2", "What is deadband protocol?", "P0 rocks, P1 channels, P2 optimize.", "deadband", 0.85),
make_tile("batch3", "What is a holodeck?", "MUD environment for agents.", "holodeck", 0.8),
make_tile("batch4", "What is flux?", "Deterministic bytecode runtime.", "flux", 0.9),
make_tile("batch5", "What is an ensign?", "Compressed expertise adapter.", "plato", 0.85),
make_tile("batch6", "What is a tile?", "Atomic Q/A knowledge unit.", "plato", 0.95),
make_tile("batch7", "What is a room?", "Thematic tile collection.", "plato", 0.9),
make_tile("batch8", "What is the forge?", "GPU training pipeline.", "training", 0.8),
make_tile("batch9", "What is lab guard?", "Hypothesis gating.", "training", 0.85),
make_tile("batch10", "What is ghost tile?", "Decayed inactive knowledge.", "plato", 0.8),
];
let mut accepted = 0;
for tile in tiles {
if let PipelineResult::Accepted { .. } = p.process(tile, "plato training", now()) {
accepted += 1;
}
}
assert_eq!(accepted, 10);
let results = p.search("plato tile room ensign training knowledge", now());
assert!(results.len() >= 2); }
#[test]
fn test_validation_gate_confidence() {
let good = make_tile("g", "What is PLATO?", "Training pipeline.", "plato", 0.9);
let bad = make_tile("b", "What?", "Low conf.", "plato", 0.1);
assert!(ValidationGate::confidence(&good));
assert!(!ValidationGate::confidence(&bad));
}
#[test]
fn test_scorer_keyword_gating() {
let tile = make_tile("kg", "What is quantum computing?", "Qubits and superposition.", "physics", 0.9);
let score_relevant = TileScorer::score(&tile, "quantum computing qubits", now());
let score_irrelevant = TileScorer::score(&tile, "fishing boats anchors", now());
assert!(score_relevant > 0.1);
assert!(score_irrelevant < 0.01); }
#[test]
fn test_dedup_exact_and_fuzzy() {
let a = make_tile("da", "What is flux?", "Bytecode runtime for agents.", "flux", 0.9);
let b = make_tile("db", "What is flux?", "Bytecode runtime for agents.", "flux", 0.9);
let c = make_tile("dc", "What is flux runtime?", "Bytecode VM for deterministic execution.", "flux", 0.9);
assert!(TileDedup::is_duplicate(&a, &b)); let batch = vec![a, b, c];
assert_eq!(TileDedup::dedup_batch(&batch).len(), 2); }
#[test]
fn test_version_diff() {
let a = make_tile("va", "Q?", "A1", "d", 0.9);
let b = make_tile("vb", "Q?", "A2", "d", 0.9);
let c = make_tile("vc", "Q?", "A2", "d2", 0.9);
let diff_ab = TileVersion::diff(&a, &b);
let diff_bc = TileVersion::diff(&b, &c);
assert!(diff_ab.iter().any(|s| s == "answer_changed"));
assert!(diff_bc.iter().any(|s| s == "domain_changed"));
}
}