use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::time::Duration;
use crate::storage::Storage;
use crate::types::MemoryRecord;
mod duration_secs {
use serde::{self, Deserialize, Deserializer, Serializer};
use std::time::Duration;
pub fn serialize<S>(duration: &Duration, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_f64(duration.as_secs_f64())
}
pub fn deserialize<'de, D>(deserializer: D) -> Result<Duration, D::Error>
where
D: Deserializer<'de>,
{
let secs = f64::deserialize(deserializer)?;
Ok(Duration::from_secs_f64(secs))
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ClusterWeights {
pub hebbian: f64,
pub entity: f64,
pub embedding: f64,
pub temporal: f64,
}
impl Default for ClusterWeights {
fn default() -> Self {
Self {
hebbian: 0.4,
entity: 0.3,
embedding: 0.2,
temporal: 0.1,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PairwiseSignals {
pub hebbian_weight: Option<f64>,
pub entity_overlap: f64,
pub embedding_similarity: f64,
pub temporal_proximity: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SignalsSummary {
pub dominant_signal: ClusterSignal,
pub hebbian_contribution: f64,
pub entity_contribution: f64,
pub embedding_contribution: f64,
pub temporal_contribution: f64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum ClusterSignal {
Hebbian,
Entity,
Embedding,
Temporal,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryCluster {
pub id: String,
pub members: Vec<String>,
pub quality_score: f64,
pub centroid_id: String,
pub signals_summary: SignalsSummary,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ClusterDiscoveryConfig {
pub weights: ClusterWeights,
pub cluster_threshold: f64,
pub min_cluster_size: usize,
pub max_cluster_size: usize,
pub min_importance: f64,
pub temporal_decay_lambda: f64,
pub temporal_half_life_hours: f64,
pub cooldown_cycles: u32,
#[serde(with = "duration_secs")]
pub temporal_spread_minimum: Duration,
#[serde(default)]
pub max_neighbors_per_node: Option<usize>,
#[serde(default)]
pub infomap_trials: Option<usize>,
#[serde(default)]
pub infomap_hierarchical: Option<bool>,
#[serde(default)]
pub hot_assign_threshold: Option<f64>,
#[serde(default)]
pub cold_recluster_ratio: Option<f64>,
#[serde(default)]
pub warm_recluster_interval: Option<usize>,
}
impl Default for ClusterDiscoveryConfig {
fn default() -> Self {
Self {
weights: ClusterWeights::default(),
cluster_threshold: 0.3,
min_cluster_size: 3,
max_cluster_size: 15,
min_importance: 0.3,
temporal_decay_lambda: 0.00413,
temporal_half_life_hours: 168.0,
cooldown_cycles: 3,
temporal_spread_minimum: Duration::from_secs(3600),
max_neighbors_per_node: None,
infomap_trials: None,
infomap_hierarchical: None,
hot_assign_threshold: None,
cold_recluster_ratio: None,
warm_recluster_interval: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EmotionalModulationConfig {
pub emotional_boost_weight: f64,
pub prioritize_emotional: bool,
pub include_emotion_in_prompt: bool,
}
impl Default for EmotionalModulationConfig {
fn default() -> Self {
Self {
emotional_boost_weight: 0.2,
prioritize_emotional: true,
include_emotion_in_prompt: true,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum GateDecision {
Synthesize { reason: String },
AutoUpdate { action: AutoUpdateAction },
Defer { reason: String },
Skip { reason: String },
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum AutoUpdateAction {
MergeDuplicates { keep: String, demote: Vec<String> },
StrengthenLinks { pairs: Vec<(String, String)> },
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GateConfig {
pub min_cluster_size: usize,
pub gate_quality_threshold: f64,
pub defer_quality_threshold: f64,
pub duplicate_similarity: f64,
pub min_type_diversity: usize,
pub cost_threshold: f64,
pub premium_threshold: f64,
}
impl Default for GateConfig {
fn default() -> Self {
Self {
min_cluster_size: 3,
gate_quality_threshold: 0.4,
defer_quality_threshold: 0.6,
duplicate_similarity: 0.92,
min_type_diversity: 2,
cost_threshold: 0.05,
premium_threshold: 0.8,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GateResult {
pub cluster_id: String,
pub decision: GateDecision,
pub scores: GateScores,
pub timestamp: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GateScores {
pub quality: f64,
pub type_diversity: usize,
pub estimated_cost: f64,
pub member_count: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SynthesisRequest {
pub cluster: MemoryCluster,
pub members: Vec<MemoryRecord>,
pub config: SynthesisConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SynthesisConfig {
pub model: String,
pub max_tokens: usize,
pub temperature: f64,
pub prompt_template: PromptTemplate,
pub max_memories_per_llm_call: usize,
#[serde(with = "duration_secs")]
pub resynthesis_age_threshold: Duration,
}
impl Default for SynthesisConfig {
fn default() -> Self {
Self {
model: String::new(),
max_tokens: 512,
temperature: 0.3,
prompt_template: PromptTemplate::General,
max_memories_per_llm_call: 10,
resynthesis_age_threshold: Duration::from_secs(30 * 24 * 3600),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum PromptTemplate {
General,
FactualPattern,
EpisodicThread,
CausalChain,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SynthesisOutput {
pub insight_text: String,
pub confidence: f64,
pub insight_type: InsightType,
pub source_references: Vec<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum InsightType {
Pattern,
Rule,
Connection,
Contradiction,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProvenanceRecord {
pub id: String,
pub insight_id: String,
pub source_id: String,
pub cluster_id: String,
pub synthesis_timestamp: DateTime<Utc>,
pub gate_decision: String,
pub gate_scores: Option<GateScores>,
pub confidence: f64,
pub source_original_importance: Option<f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProvenanceChain {
pub root_id: String,
pub layers: Vec<Vec<ProvenanceRecord>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UndoSynthesis {
pub insight_id: String,
pub restored_sources: Vec<RestoredSource>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RestoredSource {
pub memory_id: String,
pub original_importance: f64,
pub restored: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IncrementalState {
pub last_member_snapshot: HashSet<String>,
pub last_quality_score: f64,
pub last_run: DateTime<Utc>,
pub run_count: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IncrementalConfig {
pub staleness_member_change_pct: f64,
pub staleness_quality_delta: f64,
}
impl Default for IncrementalConfig {
fn default() -> Self {
Self {
staleness_member_change_pct: 0.5,
staleness_quality_delta: 0.2,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SynthesisReport {
pub clusters_found: usize,
pub clusters_synthesized: usize,
pub clusters_auto_updated: usize,
pub clusters_deferred: usize,
pub clusters_skipped: usize,
pub synthesis_runs_full: usize,
pub synthesis_runs_incremental: usize,
pub insights_created: Vec<String>,
pub sources_demoted: Vec<String>,
pub errors: Vec<SynthesisError>,
#[serde(with = "duration_secs")]
pub duration: Duration,
pub gate_results: Vec<GateResult>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum SynthesisError {
LlmTimeout { cluster_id: String },
LlmInvalidResponse {
cluster_id: String,
raw_response: String,
},
HallucinatedReferences {
cluster_id: String,
invalid_ids: Vec<String>,
},
ValidationFailed { cluster_id: String, reason: String },
StorageError { cluster_id: String, message: String },
EmbeddingError { memory_id: String, message: String },
BudgetExhausted { remaining_clusters: usize },
ClusterStale { cluster_id: String },
}
impl std::fmt::Display for SynthesisError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::LlmTimeout { cluster_id } => {
write!(f, "LLM timeout for cluster {}", cluster_id)
}
Self::LlmInvalidResponse { cluster_id, .. } => {
write!(f, "LLM invalid response for cluster {}", cluster_id)
}
Self::HallucinatedReferences {
cluster_id,
invalid_ids,
} => {
write!(
f,
"Hallucinated references in cluster {}: {:?}",
cluster_id, invalid_ids
)
}
Self::ValidationFailed { cluster_id, reason } => {
write!(
f,
"Validation failed for cluster {}: {}",
cluster_id, reason
)
}
Self::StorageError {
cluster_id,
message,
} => {
write!(f, "Storage error for cluster {}: {}", cluster_id, message)
}
Self::EmbeddingError {
memory_id,
message,
} => {
write!(f, "Embedding error for memory {}: {}", memory_id, message)
}
Self::BudgetExhausted {
remaining_clusters,
} => {
write!(
f,
"LLM budget exhausted with {} clusters remaining",
remaining_clusters
)
}
Self::ClusterStale { cluster_id } => {
write!(f, "Cluster {} became stale", cluster_id)
}
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SynthesisSettings {
pub enabled: bool,
pub cluster_discovery: ClusterDiscoveryConfig,
pub gate: GateConfig,
pub synthesis: SynthesisConfig,
pub emotional: EmotionalModulationConfig,
pub incremental: IncrementalConfig,
pub demotion_factor: f64,
pub max_insights_per_consolidation: usize,
pub max_llm_calls_per_run: u32,
}
impl Default for SynthesisSettings {
fn default() -> Self {
Self {
enabled: false,
cluster_discovery: ClusterDiscoveryConfig::default(),
gate: GateConfig::default(),
synthesis: SynthesisConfig::default(),
emotional: EmotionalModulationConfig::default(),
incremental: IncrementalConfig::default(),
demotion_factor: 0.5,
max_insights_per_consolidation: 5,
max_llm_calls_per_run: 5,
}
}
}
pub trait SynthesisLlmProvider: Send + Sync {
fn generate(
&self,
prompt: &str,
config: &SynthesisConfig,
) -> Result<String, Box<dyn std::error::Error>>;
}
pub trait SynthesisEngine: Send + Sync {
fn synthesize(
&self,
storage: &mut Storage,
settings: &SynthesisSettings,
) -> Result<SynthesisReport, Box<dyn std::error::Error>>;
fn discover_clusters(
&self,
storage: &Storage,
config: &ClusterDiscoveryConfig,
) -> Result<Vec<MemoryCluster>, Box<dyn std::error::Error>>;
fn check_gate(
&self,
cluster: &MemoryCluster,
members: &[MemoryRecord],
config: &GateConfig,
) -> GateResult;
fn undo_synthesis(
&self,
storage: &mut Storage,
insight_id: &str,
) -> Result<UndoSynthesis, Box<dyn std::error::Error>>;
fn get_provenance(
&self,
storage: &Storage,
memory_id: &str,
max_depth: usize,
) -> Result<ProvenanceChain, Box<dyn std::error::Error>>;
}