use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SemanticDelta {
pub id: String,
pub concept_anchor: String,
pub embedding_delta: Vec<f32>,
pub strength: f32,
pub source: DeltaSource,
pub timestamp: DateTime<Utc>,
#[serde(default)]
pub tags: Vec<String>,
}
impl SemanticDelta {
pub fn new(
concept_anchor: impl Into<String>,
embedding_delta: Vec<f32>,
strength: f32,
source: DeltaSource,
) -> Self {
Self {
id: uuid::Uuid::new_v4().to_string(),
concept_anchor: concept_anchor.into(),
embedding_delta,
strength,
source,
timestamp: Utc::now(),
tags: Vec::new(),
}
}
pub fn with_tags(mut self, tags: Vec<String>) -> Self {
self.tags = tags;
self
}
pub fn magnitude(&self) -> f32 {
self.embedding_delta
.iter()
.map(|x| x * x)
.sum::<f32>()
.sqrt()
}
pub fn is_significant(&self, threshold: f32) -> bool {
self.magnitude() >= threshold && self.strength >= 0.3
}
pub fn scaled(&self, factor: f32) -> Self {
Self {
id: self.id.clone(),
concept_anchor: self.concept_anchor.clone(),
embedding_delta: self.embedding_delta.iter().map(|x| x * factor).collect(),
strength: self.strength * factor,
source: self.source.clone(),
timestamp: self.timestamp,
tags: self.tags.clone(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum DeltaSource {
SelfLearning {
task_context: String,
},
PeerLearning {
instance_id: String,
shared_at: DateTime<Utc>,
},
Migration {
version: String,
},
SkillBox {
skill_id: String,
},
DocumentIngestion {
document_id: String,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DistillationConfig {
pub min_share_magnitude: f32,
pub min_share_strength: f32,
pub max_delta_age_secs: u64,
pub peer_ingestion_scale: f32,
pub auto_share: bool,
}
impl Default for DistillationConfig {
fn default() -> Self {
Self {
min_share_magnitude: 0.05,
min_share_strength: 0.3,
max_delta_age_secs: 86400 * 7, peer_ingestion_scale: 0.5, auto_share: false,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ThermogramSnapshot {
pub id: String,
pub centroid: Vec<f32>,
pub entry_count: usize,
pub timestamp: DateTime<Utc>,
#[serde(default)]
pub entry_embeddings: Vec<(String, Vec<f32>)>,
}
impl ThermogramSnapshot {
pub fn from_centroid(id: impl Into<String>, centroid: Vec<f32>, entry_count: usize) -> Self {
Self {
id: id.into(),
centroid,
entry_count,
timestamp: Utc::now(),
entry_embeddings: Vec::new(),
}
}
pub fn with_entries(mut self, entries: Vec<(String, Vec<f32>)>) -> Self {
self.entry_embeddings = entries;
self
}
}
pub fn distill_learning(
before: &ThermogramSnapshot,
after: &ThermogramSnapshot,
concept_anchor: impl Into<String>,
task_context: impl Into<String>,
config: &DistillationConfig,
) -> Option<SemanticDelta> {
if before.centroid.len() != after.centroid.len() {
return None;
}
let embedding_delta: Vec<f32> = before
.centroid
.iter()
.zip(&after.centroid)
.map(|(b, a)| a - b)
.collect();
let magnitude: f32 = embedding_delta.iter().map(|x| x * x).sum::<f32>().sqrt();
if magnitude < config.min_share_magnitude {
return None;
}
let entry_ratio = (after.entry_count as f32) / (before.entry_count.max(1) as f32);
let strength = (magnitude * entry_ratio).min(1.0);
if strength < config.min_share_strength {
return None;
}
Some(SemanticDelta::new(
concept_anchor,
embedding_delta,
strength,
DeltaSource::SelfLearning {
task_context: task_context.into(),
},
))
}
pub fn apply_delta_to_embedding(embedding: &[f32], delta: &SemanticDelta, scale: f32) -> Vec<f32> {
if embedding.len() != delta.embedding_delta.len() {
return embedding.to_vec();
}
embedding
.iter()
.zip(&delta.embedding_delta)
.map(|(e, d)| e + d * scale * delta.strength)
.collect()
}
pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() || a.is_empty() {
return 0.0;
}
let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
let mag_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let mag_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if mag_a == 0.0 || mag_b == 0.0 {
return 0.0;
}
dot / (mag_a * mag_b)
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeltaBatch {
pub id: String,
pub source_instance: String,
pub deltas: Vec<SemanticDelta>,
pub created: DateTime<Utc>,
#[serde(default)]
pub signature: Option<String>,
}
impl DeltaBatch {
pub fn new(source_instance: impl Into<String>, deltas: Vec<SemanticDelta>) -> Self {
Self {
id: uuid::Uuid::new_v4().to_string(),
source_instance: source_instance.into(),
deltas,
created: Utc::now(),
signature: None,
}
}
pub fn with_signature(mut self, sig: impl Into<String>) -> Self {
self.signature = Some(sig.into());
self
}
pub fn filter_significant(&self, threshold: f32) -> Self {
Self {
id: self.id.clone(),
source_instance: self.source_instance.clone(),
deltas: self
.deltas
.iter()
.filter(|d| d.is_significant(threshold))
.cloned()
.collect(),
created: self.created,
signature: None, }
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_semantic_delta_creation() {
let delta = SemanticDelta::new(
"rust_types",
vec![0.1, -0.2, 0.3],
0.8,
DeltaSource::SelfLearning {
task_context: "learning types".to_string(),
},
);
assert_eq!(delta.concept_anchor, "rust_types");
assert_eq!(delta.strength, 0.8);
}
#[test]
fn test_delta_magnitude() {
let delta = SemanticDelta::new(
"test",
vec![3.0, 4.0], 0.5,
DeltaSource::SelfLearning {
task_context: "test".to_string(),
},
);
assert!((delta.magnitude() - 5.0).abs() < 0.001);
}
#[test]
fn test_distill_learning() {
let before = ThermogramSnapshot::from_centroid("snap1", vec![0.0, 0.0, 0.0], 10);
let after = ThermogramSnapshot::from_centroid("snap2", vec![0.2, 0.2, 0.2], 20);
let config = DistillationConfig::default();
let delta = distill_learning(&before, &after, "test_concept", "learning test", &config);
assert!(delta.is_some());
let delta = delta.unwrap();
assert_eq!(delta.concept_anchor, "test_concept");
}
#[test]
fn test_distill_insignificant() {
let before = ThermogramSnapshot::from_centroid("snap1", vec![0.0, 0.0, 0.0], 10);
let after = ThermogramSnapshot::from_centroid("snap2", vec![0.001, 0.001, 0.001], 10);
let config = DistillationConfig::default();
let delta = distill_learning(&before, &after, "test_concept", "learning test", &config);
assert!(delta.is_none()); }
#[test]
fn test_cosine_similarity() {
let a = vec![1.0, 0.0, 0.0];
let b = vec![1.0, 0.0, 0.0];
assert!((cosine_similarity(&a, &b) - 1.0).abs() < 0.001);
let c = vec![0.0, 1.0, 0.0];
assert!(cosine_similarity(&a, &c).abs() < 0.001);
let d = vec![-1.0, 0.0, 0.0];
assert!((cosine_similarity(&a, &d) - (-1.0)).abs() < 0.001); }
#[test]
fn test_apply_delta() {
let embedding = vec![1.0, 2.0, 3.0];
let delta = SemanticDelta::new(
"test",
vec![0.1, 0.2, 0.3],
1.0,
DeltaSource::SelfLearning {
task_context: "test".to_string(),
},
);
let result = apply_delta_to_embedding(&embedding, &delta, 1.0);
assert!((result[0] - 1.1).abs() < 0.001);
assert!((result[1] - 2.2).abs() < 0.001);
assert!((result[2] - 3.3).abs() < 0.001);
}
#[test]
fn test_delta_batch() {
let deltas = vec![SemanticDelta::new(
"test",
vec![0.1, 0.2],
0.8,
DeltaSource::SelfLearning {
task_context: "test".to_string(),
},
)];
let batch = DeltaBatch::new("instance-1", deltas);
assert_eq!(batch.deltas.len(), 1);
assert_eq!(batch.source_instance, "instance-1");
}
}