hirn-engine 0.1.0

//! Three-phase consolidation cycle: REPLAY → DREAM → VALIDATE.
//!
//! Inspired by neuroscience: NREM sleep replays experiences into semantic
//! knowledge, REM sleep generates novel hypotheses by connecting distant
//! memories, and a validation phase scores hypotheses against evidence.

use std::sync::Arc;
use std::time::Instant;

use hirn_core::HirnResult;
use hirn_core::embed::{ChatMessage, LlmOptions, LlmProvider};
use hirn_core::id::MemoryId;
use hirn_core::metadata::Metadata;
use hirn_core::semantic::SemanticRecord;
use hirn_core::timestamp::Timestamp;
use hirn_core::types::{AgentId, EdgeRelation, KnowledgeType, Origin};

use super::ConsolidationConfig;
use crate::HirnDB;
use crate::db::SemanticFilter;

// ═══════════════════════════════════════════════════════════════════════════
// Configuration
// ═══════════════════════════════════════════════════════════════════════════

/// Configuration for the three-phase dream cycle.
#[derive(Debug, Clone)]
pub struct DreamCycleConfig {
    /// Enable the REPLAY phase (existing consolidation pipeline). Default: true.
    pub replay_enabled: bool,
    /// Enable the DREAM phase (LLM-based hypothesis generation). Default: true.
    pub dream_enabled: bool,
    /// Enable the VALIDATE phase (evidence scoring). Default: true.
    pub validate_enabled: bool,
    /// Number of memory pairs to generate hypotheses from per cycle. Default: 10.
    pub dream_batch_size: usize,
    /// Minimum cosine **distance** (= 1 − similarity) required for dream hypothesis
    /// generation. Only memory pairs at least this far apart are considered for
    /// novel association synthesis. Default: 0.7 (= similarity ≤ 0.3).
    ///
    /// Lower values widen the pair pool; higher values restrict to only very
    /// dissimilar memories. Must be in `[0.0, 1.0]`.
    pub dream_min_distance: f32,
    /// Minimum confidence for promoting a hypothesis. Default: 0.5.
    pub validation_confidence_threshold: f32,
    /// Maximum number of evidence episodes to search per hypothesis. Default: 20.
    pub validation_evidence_limit: usize,
    /// Base consolidation config (for REPLAY phase).
    pub consolidation_config: ConsolidationConfig,
}

impl Default for DreamCycleConfig {
    fn default() -> Self {
        Self {
            replay_enabled: true,
            dream_enabled: true,
            validate_enabled: true,
            dream_batch_size: 10,
            dream_min_distance: 0.7,
            validation_confidence_threshold: 0.5,
            validation_evidence_limit: 20,
            consolidation_config: ConsolidationConfig::default(),
        }
    }
}

// ═══════════════════════════════════════════════════════════════════════════
// Phase results
// ═══════════════════════════════════════════════════════════════════════════

/// Result from a single phase of the dream cycle.
#[derive(Debug, Clone)]
pub struct PhaseResult {
    /// Name of the phase.
    pub phase: DreamPhase,
    /// Records created in this phase.
    pub records_created: usize,
    /// Source records that contributed to this phase.
    pub source_count: usize,
    /// Execution time in milliseconds.
    pub execution_time_ms: f64,
}

/// Phase identifier.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DreamPhase {
    Replay,
    Dream,
    Validate,
}

impl std::fmt::Display for DreamPhase {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::Replay => write!(f, "REPLAY"),
            Self::Dream => write!(f, "DREAM"),
            Self::Validate => write!(f, "VALIDATE"),
        }
    }
}

/// Result from running the full dream cycle.
#[derive(Debug, Clone)]
pub struct DreamCycleResult {
    /// Unique batch identifier for this cycle.
    pub batch_id: String,
    /// Results from each phase that ran.
    pub phase_results: Vec<PhaseResult>,
    /// Number of hypotheses generated in DREAM phase.
    pub hypotheses_generated: usize,
    /// Number of hypotheses promoted in VALIDATE phase.
    pub hypotheses_promoted: usize,
    /// Number of hypotheses discarded in VALIDATE phase.
    pub hypotheses_discarded: usize,
    /// Total execution time in milliseconds.
    pub execution_time_ms: f64,
}

/// A candidate hypothesis generated during the DREAM phase.
#[derive(Debug, Clone)]
pub struct DreamHypothesis {
    /// The semantic record that was stored for this hypothesis.
    pub id: MemoryId,
    /// The connection text generated by the LLM.
    pub connection: String,
    /// Source memory A.
    pub source_a: MemoryId,
    /// Source memory B.
    pub source_b: MemoryId,
}

// ═══════════════════════════════════════════════════════════════════════════
// Dream Cycle execution
// ═══════════════════════════════════════════════════════════════════════════

/// Execute the full three-phase dream cycle: REPLAY → DREAM → VALIDATE.
///
/// The cycle is atomic: if VALIDATE fails, hypotheses from DREAM are not promoted.
pub async fn execute_dream_cycle(
    db: &HirnDB,
    llm: Arc<dyn LlmProvider>,
    config: &DreamCycleConfig,
) -> HirnResult<DreamCycleResult> {
    let start = Instant::now();
    let batch_id = format!("dream-{}", Timestamp::now().millis());
    let mut phase_results = Vec::new();
    let mut hypotheses_generated = 0;
    let mut hypotheses_promoted = 0;
    let mut hypotheses_discarded = 0;

    // ── Phase 1: REPLAY ──────────────────────────────────────────────
    if config.replay_enabled {
        let phase_start = Instant::now();
        let replay_result = super::pipeline::execute_consolidation_pipeline(
            db,
            &config.consolidation_config,
            &[],
            Some(&llm),
        )
        .await?;

        phase_results.push(PhaseResult {
            phase: DreamPhase::Replay,
            records_created: replay_result.concepts_extracted,
            source_count: replay_result.records_processed,
            execution_time_ms: phase_start.elapsed().as_secs_f64() * 1000.0,
        });
    }

    // ── Phase 2: DREAM ───────────────────────────────────────────────
    let mut hypotheses: Vec<DreamHypothesis> = Vec::new();
    if config.dream_enabled {
        let phase_start = Instant::now();

        hypotheses = execute_dream_phase(db, &llm, config, &batch_id).await?;
        hypotheses_generated = hypotheses.len();

        phase_results.push(PhaseResult {
            phase: DreamPhase::Dream,
            records_created: hypotheses.len(),
            source_count: hypotheses.len() * 2,
            execution_time_ms: phase_start.elapsed().as_secs_f64() * 1000.0,
        });
    }

    // ── Phase 3: VALIDATE ────────────────────────────────────────────
    if config.validate_enabled && !hypotheses.is_empty() {
        let phase_start = Instant::now();

        let (promoted, discarded) =
            execute_validate_phase(db, &hypotheses, config, &batch_id).await?;
        hypotheses_promoted = promoted;
        hypotheses_discarded = discarded;

        phase_results.push(PhaseResult {
            phase: DreamPhase::Validate,
            records_created: promoted,
            source_count: hypotheses.len(),
            execution_time_ms: phase_start.elapsed().as_secs_f64() * 1000.0,
        });
    }

    Ok(DreamCycleResult {
        batch_id,
        phase_results,
        hypotheses_generated,
        hypotheses_promoted,
        hypotheses_discarded,
        execution_time_ms: start.elapsed().as_secs_f64() * 1000.0,
    })
}

// ═══════════════════════════════════════════════════════════════════════════
// Phase 2: DREAM — novel hypothesis generation
// ═══════════════════════════════════════════════════════════════════════════

/// Select pairs of semantically distant memories that share entities,
/// then ask an LLM to hypothesize connections between them.
async fn execute_dream_phase(
    db: &HirnDB,
    llm: &Arc<dyn LlmProvider>,
    config: &DreamCycleConfig,
    _batch_id: &str,
) -> HirnResult<Vec<DreamHypothesis>> {
    let agent = AgentId::new("dream_replay").unwrap();

    // Load all semantic records with embeddings.
    let filter = SemanticFilter::default();
    let semantics = db.list_semantics(&filter).await?;

    // Find distant pairs with shared entities or co-activation.
    let pairs = find_distant_pairs(&semantics, config);

    let mut hypotheses = Vec::new();

    for (a, b) in pairs.into_iter().take(config.dream_batch_size) {
        // Ask LLM: "What connects these seemingly unrelated observations?"
        let prompt = build_dream_prompt(&a, &b);
        let llm_options = LlmOptions {
            temperature: 0.7,
            max_tokens: 300,
            ..Default::default()
        };

        let response = super::generate_text_with_timeout(
            llm.as_ref(),
            &prompt,
            &llm_options,
            config.consolidation_config.llm_timeout,
        )
        .await?;
        let connection = response.trim().to_string();

        // Skip if LLM says "no clear connection"
        if connection.to_lowercase().contains("no clear connection")
            || connection.to_lowercase().contains("no obvious connection")
            || connection.is_empty()
        {
            continue;
        }

        // Create a candidate semantic record tagged as a hypothesis.
        let concept_name = format!(
            "hypothesis: {} ↔ {}",
            truncate(&a.concept, 30),
            truncate(&b.concept, 30),
        );

        let mut builder = SemanticRecord::builder()
            .concept(&concept_name)
            .knowledge_type(KnowledgeType::Inferred)
            .description(&connection)
            .confidence(0.3) // low initial confidence — needs validation
            .agent_id(agent.clone())
            .origin(Origin::DreamReplay)
            .source_episode(a.id)
            .source_episode(b.id);

        // Generate embedding for the hypothesis.
        if let Ok(emb) = db.embed_text(&connection).await {
            builder = builder.embedding(emb);
        }

        let record = builder.build()?;
        let hyp_id = record.id;
        db.store_semantic(record).await?;

        // Create derived_from edges.
        let _ = db
            .connect_with(
                hyp_id,
                a.id,
                EdgeRelation::DerivedFrom,
                0.5,
                Metadata::default(),
            )
            .await;
        let _ = db
            .connect_with(
                hyp_id,
                b.id,
                EdgeRelation::DerivedFrom,
                0.5,
                Metadata::default(),
            )
            .await;

        hypotheses.push(DreamHypothesis {
            id: hyp_id,
            connection,
            source_a: a.id,
            source_b: b.id,
        });
    }

    Ok(hypotheses)
}

/// Find pairs of semantic records that are semantically distant but share entities.
pub(crate) fn find_distant_pairs(
    semantics: &[SemanticRecord],
    config: &DreamCycleConfig,
) -> Vec<(SemanticRecord, SemanticRecord)> {
    let mut pairs = Vec::new();

    for i in 0..semantics.len() {
        for j in (i + 1)..semantics.len() {
            let a = &semantics[i];
            let b = &semantics[j];

            // Both need embeddings.
            let (emb_a, emb_b) = match (a.embedding.as_ref(), b.embedding.as_ref()) {
                (Some(ea), Some(eb)) => (ea, eb),
                _ => continue,
            };

            let similarity = cosine_similarity(emb_a, emb_b);

            // We want distant pairs: distance = 1.0 - similarity must exceed dream_min_distance.
            if (1.0 - similarity) >= config.dream_min_distance {
                // Check for shared entities via related_concepts overlap,
                // or just accept all distant pairs and let LLM decide.
                pairs.push((a.clone(), b.clone()));
            }
        }
    }

    // Sort by distance (least similar first — most interesting).
    pairs.sort_by(|(a1, b1), (a2, b2)| {
        let sim1 = cosine_similarity(
            a1.embedding.as_ref().unwrap(),
            b1.embedding.as_ref().unwrap(),
        );
        let sim2 = cosine_similarity(
            a2.embedding.as_ref().unwrap(),
            b2.embedding.as_ref().unwrap(),
        );
        sim1.partial_cmp(&sim2).unwrap_or(std::cmp::Ordering::Equal)
    });

    pairs
}

/// Build the LLM prompt for dream hypothesis generation.
pub(crate) fn build_dream_prompt(a: &SemanticRecord, b: &SemanticRecord) -> Vec<ChatMessage> {
    let system = ChatMessage {
        role: "system".to_string(),
        content: "You are a cognitive association engine. Given two seemingly unrelated knowledge \
         fragments from a memory database, hypothesize a meaningful connection between them. \
         Your response should be a single paragraph describing the hypothetical connection. \
         If there is truly no plausible connection, respond with exactly: \
         \"no clear connection\"."
            .to_string(),
    };

    let sanitized_a = hirn_core::sanitize::sanitize_for_llm(&a.description);
    let sanitized_b = hirn_core::sanitize::sanitize_for_llm(&b.description);
    let user = ChatMessage {
        role: "user".to_string(),
        content: format!(
            "Fragment A ({type_a}): {desc_a}\n\n\
             Fragment B ({type_b}): {desc_b}\n\n\
             What connects these seemingly unrelated observations?",
            type_a = knowledge_type_str(a.knowledge_type),
            desc_a = sanitized_a,
            type_b = knowledge_type_str(b.knowledge_type),
            desc_b = sanitized_b,
        ),
    };

    vec![system, user]
}

fn knowledge_type_str(kt: KnowledgeType) -> &'static str {
    match kt {
        KnowledgeType::Propositional => "fact",
        KnowledgeType::Prescriptive => "rule",
        KnowledgeType::Taxonomic => "taxonomy",
        KnowledgeType::Inferred => "hypothesis",
        KnowledgeType::Community => "community summary",
        KnowledgeType::RaptorSummary => "hierarchical summary",
    }
}

// ═══════════════════════════════════════════════════════════════════════════
// Phase 3: VALIDATE — evidence scoring
// ═══════════════════════════════════════════════════════════════════════════

/// For each hypothesis, search for supporting evidence in episodic memory.
/// Promote if enough evidence; discard otherwise.
///
/// Returns (promoted_count, discarded_count).
async fn execute_validate_phase(
    db: &HirnDB,
    hypotheses: &[DreamHypothesis],
    config: &DreamCycleConfig,
    batch_id: &str,
) -> HirnResult<(usize, usize)> {
    let mut promoted = 0;
    let mut discarded = 0;

    let filter = crate::db::EpisodicFilter {
        include_archived: false,
        limit: Some(config.validation_evidence_limit * 10), // load a larger pool
        ..Default::default()
    };
    let episodes = db.list_episodes(&filter).await?;

    for hyp in hypotheses {
        // Search for supporting episodes by embedding similarity.
        let evidence_count = count_supporting_evidence(db, hyp, &episodes, config).await;

        // Compute evidence-weighted confidence.
        // Formula: conf = base + (evidence / (evidence + 3)) * 0.5
        // This saturates around 0.8 for many evidence, and stays ~0.3 for 0 evidence.
        let base_confidence = 0.3_f32;
        let evidence_boost = (evidence_count as f32 / (evidence_count as f32 + 3.0)) * 0.5;
        let new_confidence = base_confidence + evidence_boost;

        if new_confidence >= config.validation_confidence_threshold {
            // Promote: boost confidence and create graph edges.
            let update = crate::db::SemanticUpdate {
                confidence: Some(new_confidence),
                evidence_count: Some(evidence_count),
                reason: Some(format!("validated by dream cycle {batch_id}")),
                ..crate::db::SemanticUpdate::with_metadata(
                    AgentId::well_known("dream_validation"),
                    hyp.id,
                )
            };
            db.correct_semantic(hyp.id, update).await?;

            promoted += 1;
        } else {
            // Discard: delete the hypothesis record.
            db.purge_semantic(hyp.id).await?;

            discarded += 1;
        }
    }

    Ok((promoted, discarded))
}

/// Count episodic records that support a hypothesis by embedding similarity.
async fn count_supporting_evidence(
    db: &HirnDB,
    hypothesis: &DreamHypothesis,
    episodes: &[hirn_core::episodic::EpisodicRecord],
    config: &DreamCycleConfig,
) -> u32 {
    // Embed the hypothesis connection text.
    let hyp_embedding = match db.embed_text(&hypothesis.connection).await {
        Ok(emb) => emb,
        Err(_) => return 0,
    };

    let mut count = 0u32;
    for ep in episodes.iter().take(config.validation_evidence_limit * 5) {
        if let Some(ref ep_emb) = ep.embedding {
            let sim = cosine_similarity(&hyp_embedding, ep_emb);
            // An episode "supports" if it's reasonably related (sim > 0.4).
            if sim > 0.4 {
                count += 1;
            }
        }
    }

    count
}

// ═══════════════════════════════════════════════════════════════════════════
// Utilities
// ═══════════════════════════════════════════════════════════════════════════

fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
    if a.len() != b.len() || a.is_empty() {
        return 0.0;
    }
    let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
    let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
    let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
    if norm_a == 0.0 || norm_b == 0.0 {
        return 0.0;
    }
    dot / (norm_a * norm_b)
}

fn truncate(s: &str, max: usize) -> &str {
    match s.char_indices().nth(max) {
        Some((idx, _)) => &s[..idx],
        None => s,
    }
}

// ═══════════════════════════════════════════════════════════════════════════
// Tests
// ═══════════════════════════════════════════════════════════════════════════

#[cfg(test)]
mod tests {
    use super::*;
    use std::sync::atomic::{AtomicUsize, Ordering};

    // ── Mock LLM ─────────────────────────────────────────────────────

    struct MockDreamLlm {
        responses: Vec<String>,
        call_idx: AtomicUsize,
    }

    impl MockDreamLlm {
        fn new(responses: Vec<&str>) -> Self {
            Self {
                responses: responses.into_iter().map(String::from).collect(),
                call_idx: AtomicUsize::new(0),
            }
        }

        fn constant(response: &str) -> Self {
            Self::new(vec![response; 100])
        }
    }

    #[async_trait::async_trait]
    impl LlmProvider for MockDreamLlm {
        async fn generate_text(
            &self,
            _messages: &[ChatMessage],
            _options: &LlmOptions,
        ) -> hirn_core::HirnResult<String> {
            let idx = self.call_idx.fetch_add(1, Ordering::Relaxed);
            let text = self
                .responses
                .get(idx)
                .cloned()
                .unwrap_or_else(|| "no clear connection".to_string());
            Ok(text)
        }

        fn model_id(&self) -> &str {
            "mock-dream"
        }
    }

    // ── Helper ───────────────────────────────────────────────────────

    async fn test_db() -> HirnDB {
        let dir = tempfile::tempdir().unwrap();
        let db_path = dir.path().join("test");
        let lance_path = dir.path().join("lance");
        let mut config = hirn_core::HirnConfig::default();
        config.db_path = db_path;
        config.embedding_dimensions = hirn_core::EmbeddingDimension::new_const(3);
        let storage: std::sync::Arc<dyn hirn_storage::PhysicalStore> = hirn_storage::HirnDb::open(
            hirn_storage::HirnDbConfig::local(lance_path.to_str().unwrap()),
        )
        .await
        .unwrap()
        .store_arc();
        let db = HirnDB::open_with_config(config, storage).await.unwrap();
        std::mem::forget(dir);
        db
    }

    fn agent() -> AgentId {
        AgentId::new("test").unwrap()
    }

    // ── Tests ────────────────────────────────────────────────────────

    #[tokio::test(flavor = "multi_thread")]
    async fn test_dream_cycle_all_phases() {
        let db = test_db().await;

        // Store two distant semantic records.
        let rec_a = SemanticRecord::builder()
            .concept("JWT authentication tokens")
            .knowledge_type(KnowledgeType::Propositional)
            .description("JWT tokens are used for stateless authentication in web APIs")
            .embedding(vec![1.0, 0.0, 0.0])
            .confidence(0.8)
            .agent_id(agent())
            .origin(Origin::Consolidation)
            .build()
            .unwrap();
        let _id_a = db.store_semantic(rec_a).await.unwrap();

        let rec_b = SemanticRecord::builder()
            .concept("cache TTL expiry")
            .knowledge_type(KnowledgeType::Propositional)
            .description("Cache entries expire after a configurable TTL period to ensure freshness")
            .embedding(vec![0.0, 0.0, 1.0])
            .confidence(0.8)
            .agent_id(agent())
            .origin(Origin::Consolidation)
            .build()
            .unwrap();
        let _id_b = db.store_semantic(rec_b).await.unwrap();

        let llm: Arc<dyn LlmProvider> = Arc::new(MockDreamLlm::constant(
            "Both JWT tokens and cache entries share an expiration-based lifecycle \
             pattern. JWT token expiry is essentially a security-focused TTL, while \
             cache TTL manages data freshness. This suggests a common pattern of \
             time-bounded validity that could be abstracted.",
        ));

        let config = DreamCycleConfig {
            replay_enabled: false, // skip REPLAY (no episodes)
            dream_batch_size: 5,
            dream_min_distance: 0.5,
            validation_confidence_threshold: 0.3,
            ..Default::default()
        };

        let result = execute_dream_cycle(&db, llm, &config).await.unwrap();

        assert_eq!(result.hypotheses_generated, 1);
        assert!(result.phase_results.len() >= 2); // DREAM + VALIDATE
        assert!(result.hypotheses_promoted + result.hypotheses_discarded == 1);
    }

    #[tokio::test(flavor = "multi_thread")]
    async fn test_dream_disabled_only_replay() {
        let db = test_db().await;
        let llm: Arc<dyn LlmProvider> = Arc::new(MockDreamLlm::constant("unused"));

        let config = DreamCycleConfig {
            replay_enabled: true,
            dream_enabled: false,
            validate_enabled: false,
            ..Default::default()
        };

        let result = execute_dream_cycle(&db, llm, &config).await.unwrap();

        // Only REPLAY ran.
        assert_eq!(result.phase_results.len(), 1);
        assert_eq!(result.phase_results[0].phase, DreamPhase::Replay);
        assert_eq!(result.hypotheses_generated, 0);
    }

    #[tokio::test(flavor = "multi_thread")]
    async fn test_validate_rejects_all_hypotheses() {
        let db = test_db().await;

        // Create two distant semantic records.
        let rec_a = SemanticRecord::builder()
            .concept("quantum entanglement")
            .knowledge_type(KnowledgeType::Propositional)
            .description("Quantum particles can be entangled regardless of distance")
            .embedding(vec![1.0, 0.0, 0.0])
            .confidence(0.8)
            .agent_id(agent())
            .origin(Origin::Consolidation)
            .build()
            .unwrap();
        db.store_semantic(rec_a).await.unwrap();

        let rec_b = SemanticRecord::builder()
            .concept("bread baking temperature")
            .knowledge_type(KnowledgeType::Prescriptive)
            .description("Bread should be baked at 220 degrees Celsius for a crisp crust")
            .embedding(vec![0.0, 0.0, 1.0])
            .confidence(0.8)
            .agent_id(agent())
            .origin(Origin::Consolidation)
            .build()
            .unwrap();
        db.store_semantic(rec_b).await.unwrap();

        let llm: Arc<dyn LlmProvider> = Arc::new(MockDreamLlm::constant(
            "Temperature control is important in both quantum physics and baking.",
        ));

        let config = DreamCycleConfig {
            replay_enabled: false,
            dream_batch_size: 5,
            dream_min_distance: 0.5,
            // High threshold — nothing gets promoted.
            validation_confidence_threshold: 0.99,
            ..Default::default()
        };

        let result = execute_dream_cycle(&db, llm, &config).await.unwrap();

        assert_eq!(result.hypotheses_generated, 1);
        assert_eq!(result.hypotheses_promoted, 0);
        assert_eq!(result.hypotheses_discarded, 1);
    }

    #[tokio::test(flavor = "multi_thread")]
    async fn test_events_logged_per_phase() {
        let db = test_db().await;

        let rec_a = SemanticRecord::builder()
            .concept("microservices architecture")
            .knowledge_type(KnowledgeType::Propositional)
            .description("Services communicate via REST APIs and message queues")
            .embedding(vec![1.0, 0.0, 0.0])
            .confidence(0.8)
            .agent_id(agent())
            .origin(Origin::Consolidation)
            .build()
            .unwrap();
        db.store_semantic(rec_a).await.unwrap();

        let rec_b = SemanticRecord::builder()
            .concept("neural network layers")
            .knowledge_type(KnowledgeType::Propositional)
            .description("Deep learning models stack multiple transformation layers")
            .embedding(vec![0.0, 0.0, 1.0])
            .confidence(0.8)
            .agent_id(agent())
            .origin(Origin::Consolidation)
            .build()
            .unwrap();
        db.store_semantic(rec_b).await.unwrap();

        let llm: Arc<dyn LlmProvider> = Arc::new(MockDreamLlm::constant(
            "Both involve layered architectures where each layer transforms data for the next.",
        ));

        let config = DreamCycleConfig {
            replay_enabled: false,
            dream_batch_size: 5,
            dream_min_distance: 0.5,
            validation_confidence_threshold: 0.3,
            ..Default::default()
        };

        let result = execute_dream_cycle(&db, llm, &config).await.unwrap();

        assert!(!result.batch_id.is_empty());
        // DREAM and VALIDATE phases should have run.
        let phases: Vec<DreamPhase> = result.phase_results.iter().map(|p| p.phase).collect();
        assert!(phases.contains(&DreamPhase::Dream));
        assert!(phases.contains(&DreamPhase::Validate));
    }

    #[tokio::test(flavor = "multi_thread")]
    async fn test_no_clear_connection_skipped() {
        let db = test_db().await;

        let rec_a = SemanticRecord::builder()
            .concept("photosynthesis")
            .knowledge_type(KnowledgeType::Propositional)
            .description("Plants convert sunlight into chemical energy")
            .embedding(vec![1.0, 0.0, 0.0])
            .confidence(0.8)
            .agent_id(agent())
            .origin(Origin::Consolidation)
            .build()
            .unwrap();
        db.store_semantic(rec_a).await.unwrap();

        let rec_b = SemanticRecord::builder()
            .concept("compiler optimization")
            .knowledge_type(KnowledgeType::Propositional)
            .description("Compilers apply transformations to generate faster machine code")
            .embedding(vec![0.0, 0.0, 1.0])
            .confidence(0.8)
            .agent_id(agent())
            .origin(Origin::Consolidation)
            .build()
            .unwrap();
        db.store_semantic(rec_b).await.unwrap();

        // LLM says no connection.
        let llm: Arc<dyn LlmProvider> = Arc::new(MockDreamLlm::constant("no clear connection"));

        let config = DreamCycleConfig {
            replay_enabled: false,
            dream_batch_size: 5,
            dream_min_distance: 0.5,
            ..Default::default()
        };

        let result = execute_dream_cycle(&db, llm, &config).await.unwrap();

        assert_eq!(result.hypotheses_generated, 0);
    }

    #[tokio::test(flavor = "multi_thread")]
    async fn test_hypothesis_has_correct_metadata() {
        let db = test_db().await;

        let rec_a = SemanticRecord::builder()
            .concept("event sourcing")
            .knowledge_type(KnowledgeType::Propositional)
            .description("Persist state changes as a sequence of events")
            .embedding(vec![1.0, 0.0, 0.0])
            .confidence(0.8)
            .agent_id(agent())
            .origin(Origin::Consolidation)
            .build()
            .unwrap();
        let _id_a = db.store_semantic(rec_a).await.unwrap();

        let rec_b = SemanticRecord::builder()
            .concept("git version control")
            .knowledge_type(KnowledgeType::Propositional)
            .description("Track code changes as commits in a directed acyclic graph")
            .embedding(vec![0.0, 0.0, 1.0])
            .confidence(0.8)
            .agent_id(agent())
            .origin(Origin::Consolidation)
            .build()
            .unwrap();
        let _id_b = db.store_semantic(rec_b).await.unwrap();

        let llm: Arc<dyn LlmProvider> = Arc::new(MockDreamLlm::constant(
            "Both event sourcing and git are append-only log systems that enable time travel.",
        ));

        let config = DreamCycleConfig {
            replay_enabled: false,
            dream_batch_size: 5,
            dream_min_distance: 0.5,
            validation_confidence_threshold: 0.3,
            ..Default::default()
        };

        let result = execute_dream_cycle(&db, llm, &config).await.unwrap();

        assert_eq!(result.hypotheses_generated, 1);

        // Check that the hypothesis record has correct metadata.
        let semantics = db.list_semantics(&SemanticFilter::default()).await.unwrap();
        let hypothesis = semantics
            .iter()
            .find(|s| s.knowledge_type == KnowledgeType::Inferred)
            .expect("should have an inferred record");

        assert!(hypothesis.concept.starts_with("hypothesis:"));
        assert!(*hypothesis.provenance.origin() == Origin::DreamReplay);
        // Source episodes should contain the two source memory IDs.
        assert_eq!(hypothesis.source_episodes.len(), 2);
    }

    #[tokio::test(flavor = "multi_thread")]
    async fn test_validate_cycle_failure_no_promotion() {
        // If VALIDATE fails (e.g. due to high threshold), hypotheses are not promoted.
        let db = test_db().await;

        let rec_a = SemanticRecord::builder()
            .concept("distributed consensus")
            .knowledge_type(KnowledgeType::Propositional)
            .description("Raft and Paxos achieve consensus across distributed nodes")
            .embedding(vec![1.0, 0.0, 0.0])
            .confidence(0.8)
            .agent_id(agent())
            .origin(Origin::Consolidation)
            .build()
            .unwrap();
        db.store_semantic(rec_a).await.unwrap();

        let rec_b = SemanticRecord::builder()
            .concept("recipe ingredient substitution")
            .knowledge_type(KnowledgeType::Prescriptive)
            .description("Butter can be substituted with margarine in most baking recipes")
            .embedding(vec![0.0, 0.0, 1.0])
            .confidence(0.8)
            .agent_id(agent())
            .origin(Origin::Consolidation)
            .build()
            .unwrap();
        db.store_semantic(rec_b).await.unwrap();

        let llm: Arc<dyn LlmProvider> = Arc::new(MockDreamLlm::constant(
            "Both involve choosing between alternatives to achieve a goal.",
        ));

        let config = DreamCycleConfig {
            replay_enabled: false,
            dream_batch_size: 5,
            dream_min_distance: 0.5,
            validation_confidence_threshold: 0.99, // impossible to meet
            ..Default::default()
        };

        let result = execute_dream_cycle(&db, llm, &config).await.unwrap();

        assert_eq!(result.hypotheses_generated, 1);
        assert_eq!(result.hypotheses_promoted, 0);
        assert_eq!(result.hypotheses_discarded, 1);

        // Verify hypothesis was deleted from semantic store.
        let semantics = db.list_semantics(&SemanticFilter::default()).await.unwrap();
        let inferred = semantics
            .iter()
            .filter(|s| s.knowledge_type == KnowledgeType::Inferred)
            .count();
        assert_eq!(inferred, 0, "discarded hypotheses should be deleted");
    }

    // ── Unit tests for internal functions ────────────────────────────

    #[test]
    fn test_cosine_similarity() {
        assert!((cosine_similarity(&[1.0, 0.0], &[1.0, 0.0]) - 1.0).abs() < 1e-6);
        assert!((cosine_similarity(&[1.0, 0.0], &[0.0, 1.0])).abs() < 1e-6);
        assert!((cosine_similarity(&[1.0, 0.0], &[-1.0, 0.0]) + 1.0).abs() < 1e-6);
    }

    #[test]
    fn test_find_distant_pairs() {
        let agent = AgentId::new("test").unwrap();
        let mk = |concept: &str, emb: Vec<f32>| -> SemanticRecord {
            SemanticRecord::builder()
                .concept(concept)
                .knowledge_type(KnowledgeType::Propositional)
                .description(concept)
                .embedding(emb)
                .confidence(0.8)
                .agent_id(agent.clone())
                .build()
                .unwrap()
        };

        let records = vec![
            mk("a", vec![1.0, 0.0, 0.0]),
            mk("b", vec![0.0, 1.0, 0.0]),
            mk("c", vec![0.99, 0.1, 0.0]), // similar to a
        ];

        let config = DreamCycleConfig {
            dream_min_distance: 0.5,
            ..Default::default()
        };

        let pairs = find_distant_pairs(&records, &config);

        // a↔b and b↔c should be distant; a↔c should be similar.
        assert!(pairs.len() >= 2, "got {} pairs", pairs.len());
    }

    #[test]
    fn test_build_dream_prompt() {
        let agent = AgentId::new("test").unwrap();
        let a = SemanticRecord::builder()
            .concept("test A")
            .knowledge_type(KnowledgeType::Propositional)
            .description("Description A")
            .confidence(0.8)
            .agent_id(agent.clone())
            .build()
            .unwrap();

        let b = SemanticRecord::builder()
            .concept("test B")
            .knowledge_type(KnowledgeType::Prescriptive)
            .description("Description B")
            .confidence(0.8)
            .agent_id(agent)
            .build()
            .unwrap();

        let messages = build_dream_prompt(&a, &b);
        assert_eq!(messages.len(), 2);
        assert!(messages[1].content.contains("Description A"));
        assert!(messages[1].content.contains("Description B"));
    }

    #[test]
    fn test_truncate_helper() {
        assert_eq!(truncate("hello world", 5), "hello");
        assert_eq!(truncate("hi", 10), "hi");
        assert_eq!(truncate("", 5), "");
    }
}