zeph-memory 0.21.4

// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
// SPDX-License-Identifier: MIT OR Apache-2.0

use std::sync::Arc;
#[allow(unused_imports)]
use zeph_db::sql;

use std::sync::atomic::Ordering;
use tokio_util::sync::CancellationToken;
use zeph_db::DbPool;

pub use zeph_common::config::memory::NoteLinkingConfig;
use zeph_common::sanitize::strip_control_chars;
use zeph_common::text::truncate_to_bytes_ref;
use zeph_llm::any::AnyProvider;
use zeph_llm::provider::LlmProvider as _;

use crate::embedding_store::EmbeddingStore;
use crate::error::MemoryError;
use crate::graph::extractor::ExtractionResult as ExtractorResult;
use crate::vector_store::VectorFilter;

use super::SemanticMemory;

/// Callback type for post-extraction validation.
///
/// A generic predicate opaque to zeph-memory — callers (zeph-core) provide security
/// validation without introducing a dependency on security policy in this crate.
pub type PostExtractValidator = Option<Box<dyn Fn(&ExtractorResult) -> Result<(), String> + Send>>;

/// Config for the spawned background extraction task.
///
/// Owned clone of the relevant fields from `GraphConfig` — no references, safe to send to
/// spawned tasks.
#[derive(Debug, Clone)]
pub struct GraphExtractionConfig {
    pub max_entities: usize,
    pub max_edges: usize,
    pub extraction_timeout_secs: u64,
    pub community_refresh_interval: usize,
    pub expired_edge_retention_days: u32,
    pub max_entities_cap: usize,
    pub community_summary_max_prompt_bytes: usize,
    pub community_summary_concurrency: usize,
    pub lpa_edge_chunk_size: usize,
    /// A-MEM note linking config, cloned from `GraphConfig.note_linking`.
    pub note_linking: NoteLinkingConfig,
    /// A-MEM link weight decay lambda. Range: `(0.0, 1.0]`. Default: `0.95`.
    pub link_weight_decay_lambda: f64,
    /// Seconds between link weight decay passes. Default: `86400`.
    pub link_weight_decay_interval_secs: u64,
    /// Kumiho belief revision: enable semantic contradiction detection for edges.
    pub belief_revision_enabled: bool,
    /// Cosine similarity threshold for belief revision contradiction detection.
    pub belief_revision_similarity_threshold: f32,
    /// GAAMA episode linking: `conversation_id` to link extracted entities to their episode.
    /// `None` disables episode linking for this extraction pass.
    pub conversation_id: Option<i64>,
    /// APEX-MEM: use `insert_or_supersede` instead of `resolve_edge_typed`. Default: `false`.
    pub apex_mem_enabled: bool,
    /// LLM call timeout for extraction, in seconds. Default: `30`.
    pub llm_timeout_secs: u64,
    /// Per-call timeout for every `embed()` invocation, in seconds. Default: `5`.
    pub embed_timeout_secs: u64,
    /// Turn index within the episode for edges inserted during this extraction pass (#3710).
    ///
    /// `None` disables turn-level provenance recording for this pass.
    pub turn_index: Option<u32>,
    /// `MemORAI` write-gate prefilter: minimum confidence for low-signal-relation edges (#3709).
    ///
    /// `None` disables the gate (default behaviour, always writes).
    pub write_gate_min_relevance: Option<f32>,
    /// Benna-Fusi fast-variable learning rate for confidence merges (#4711).
    ///
    /// Passed to [`crate::graph::GraphStore::with_benna_rates`]. Default: `0.5`.
    pub benna_fast_rate: f32,
    /// Benna-Fusi slow-variable learning rate for confidence merges (#4711).
    ///
    /// Passed to [`crate::graph::GraphStore::with_benna_rates`]. Default: `0.05`.
    pub benna_slow_rate: f32,
}

impl Default for GraphExtractionConfig {
    fn default() -> Self {
        Self {
            max_entities: 0,
            max_edges: 0,
            extraction_timeout_secs: 0,
            community_refresh_interval: 0,
            expired_edge_retention_days: 0,
            max_entities_cap: 0,
            community_summary_max_prompt_bytes: 0,
            community_summary_concurrency: 0,
            lpa_edge_chunk_size: 0,
            note_linking: NoteLinkingConfig::default(),
            link_weight_decay_lambda: 0.95,
            link_weight_decay_interval_secs: 86400,
            belief_revision_enabled: false,
            belief_revision_similarity_threshold: 0.85,
            conversation_id: None,
            apex_mem_enabled: false,
            llm_timeout_secs: 30,
            embed_timeout_secs: 5,
            turn_index: None,
            write_gate_min_relevance: None,
            benna_fast_rate: 0.5,
            benna_slow_rate: 0.05,
        }
    }
}

/// Stats returned from a completed extraction.
#[derive(Debug, Default)]
pub struct ExtractionStats {
    pub entities_upserted: usize,
    pub edges_inserted: usize,
}

/// Result returned from `extract_and_store`, combining stats with entity IDs needed for linking.
#[derive(Debug, Default)]
pub struct ExtractionResult {
    pub stats: ExtractionStats,
    /// IDs of entities upserted during this extraction pass. Passed to `link_memory_notes`.
    pub entity_ids: Vec<i64>,
}

/// Stats returned from a completed note-linking pass.
#[derive(Debug, Default)]
pub struct LinkingStats {
    pub entities_processed: usize,
    pub edges_created: usize,
}

/// Qdrant collection name for entity embeddings (mirrors the constant in `resolver.rs`).
const ENTITY_COLLECTION: &str = "zeph_graph_entities";

/// Mirrors the constant from `graph/resolver/mod.rs` — used for sanitizing APEX-MEM inputs.
const MAX_RELATION_BYTES: usize = 256;
/// Mirrors the constant from `graph/resolver/mod.rs` — used for sanitizing APEX-MEM inputs.
const MAX_FACT_BYTES: usize = 2048;
/// Fallback confidence used when the LLM omits the `confidence` field in an extracted edge.
const DEFAULT_EDGE_CONFIDENCE: f32 = 0.8;

/// Work item for a single entity during a note-linking pass.
struct EntityWorkItem {
    entity_id: i64,
    canonical_name: String,
    embed_text: String,
    self_point_id: Option<String>,
}

/// Link newly extracted entities to semantically similar entities in the graph.
///
/// For each entity in `entity_ids`:
/// 1. Load the entity name + summary from `SQLite`.
/// 2. Embed all entity texts in parallel.
/// 3. Search the entity embedding collection in parallel for the `top_k + 1` most similar points.
/// 4. Filter out the entity itself (by `qdrant_point_id` or `entity_id` payload) and points
///    below `similarity_threshold`.
/// 5. Insert a unidirectional `similar_to` edge where `source_id < target_id` to avoid
///    double-counting in BFS recall while still being traversable via the OR clause in
///    `edges_for_entity`. The edge confidence is set to the cosine similarity score.
/// 6. Deduplicate pairs within a single pass so that a pair encountered from both A→B and B→A
///    directions is only inserted once, keeping `edges_created` accurate.
///
/// Errors are logged and not propagated — this is a best-effort background enrichment step.
pub async fn link_memory_notes(
    entity_ids: &[i64],
    pool: DbPool,
    embedding_store: Arc<EmbeddingStore>,
    provider: AnyProvider,
    cfg: &NoteLinkingConfig,
) -> LinkingStats {
    use crate::graph::GraphStore;

    let store = GraphStore::new(pool);
    let mut stats = LinkingStats::default();

    let work_items = collect_note_link_work_items(entity_ids, &store).await;
    if work_items.is_empty() {
        return stats;
    }

    let valid = embed_work_items(&work_items, &provider, cfg).await;

    let search_limit = cfg.top_k + 1; // +1 to account for self-match
    let search_results = search_similar_for_items(&valid, &embedding_store, search_limit).await;

    insert_similarity_edges(
        &work_items,
        &valid,
        &search_results,
        cfg,
        &store,
        &mut stats,
    )
    .await;

    stats
}

/// Phase 1: load entities from the DB and build work items for embedding.
///
/// Processes entities sequentially to avoid connection-pool contention.
async fn collect_note_link_work_items(
    entity_ids: &[i64],
    store: &crate::graph::GraphStore,
) -> Vec<EntityWorkItem> {
    let mut work_items: Vec<EntityWorkItem> = Vec::with_capacity(entity_ids.len());
    for &entity_id in entity_ids {
        let entity = match store.find_entity_by_id(entity_id).await {
            Ok(Some(e)) => e,
            Ok(None) => {
                tracing::debug!("note_linking: entity {entity_id} not found, skipping");
                continue;
            }
            Err(e) => {
                tracing::debug!("note_linking: DB error loading entity {entity_id}: {e:#}");
                continue;
            }
        };
        let embed_text = match &entity.summary {
            Some(s) if !s.is_empty() => format!("{}: {s}", entity.canonical_name),
            _ => entity.canonical_name.clone(),
        };
        work_items.push(EntityWorkItem {
            entity_id,
            canonical_name: entity.canonical_name,
            embed_text,
            self_point_id: entity.qdrant_point_id,
        });
    }
    work_items
}

/// Phase 2: embed all entity texts in parallel.
///
/// Returns `(work_idx, embedding)` pairs for successfully embedded items.
/// Items that fail to embed are logged and dropped.
async fn embed_work_items(
    work_items: &[EntityWorkItem],
    provider: &AnyProvider,
    cfg: &NoteLinkingConfig,
) -> Vec<(usize, Vec<f32>)> {
    use futures::future;

    let Ok(embed_results) = tokio::time::timeout(
        std::time::Duration::from_secs(cfg.timeout_secs),
        future::join_all(work_items.iter().map(|w| provider.embed(&w.embed_text))),
    )
    .await
    else {
        tracing::warn!(
            count = work_items.len(),
            "note_linking: batch embed timed out — skipping all entities"
        );
        return Vec::new();
    };
    embed_results
        .into_iter()
        .enumerate()
        .filter_map(|(i, r)| match r {
            Ok(v) => Some((i, v)),
            Err(e) => {
                tracing::debug!(
                    "note_linking: embed failed for entity {:?}: {e:#}",
                    work_items[i].canonical_name
                );
                None
            }
        })
        .collect()
}

/// Phase 3: search the embedding store for similar entities for each embedded work item.
async fn search_similar_for_items(
    valid: &[(usize, Vec<f32>)],
    embedding_store: &EmbeddingStore,
    search_limit: usize,
) -> Vec<Result<Vec<crate::ScoredVectorPoint>, MemoryError>> {
    use futures::future;

    future::join_all(valid.iter().map(|(_, vec)| {
        embedding_store.search_collection(
            ENTITY_COLLECTION,
            vec,
            search_limit,
            None::<VectorFilter>,
        )
    }))
    .await
}

/// Phase 4: insert similarity edges, deduplicating pairs seen from both A→B and B→A.
///
/// Without deduplication, both directions would call `insert_edge` for the same normalised
/// pair and both return `Ok`, inflating `edges_created` by the number of bidirectional hits.
async fn insert_similarity_edges(
    work_items: &[EntityWorkItem],
    valid: &[(usize, Vec<f32>)],
    search_results: &[Result<Vec<crate::ScoredVectorPoint>, MemoryError>],
    cfg: &NoteLinkingConfig,
    store: &crate::graph::GraphStore,
    stats: &mut LinkingStats,
) {
    let mut seen_pairs = std::collections::HashSet::new();

    for ((work_idx, _), search_result) in valid.iter().zip(search_results.iter()) {
        let w = &work_items[*work_idx];

        let results = match search_result {
            Ok(r) => r,
            Err(e) => {
                tracing::debug!(
                    "note_linking: search failed for entity {:?}: {e:#}",
                    w.canonical_name
                );
                continue;
            }
        };

        stats.entities_processed += 1;

        let self_point_id = w.self_point_id.as_deref();
        let candidates = results
            .iter()
            .filter(|p| Some(p.id.as_str()) != self_point_id && p.score >= cfg.similarity_threshold)
            .take(cfg.top_k);

        for point in candidates {
            let Some(target_id) = point
                .payload
                .get("entity_id")
                .and_then(serde_json::Value::as_i64)
            else {
                tracing::debug!(
                    "note_linking: missing entity_id in payload for point {}",
                    point.id
                );
                continue;
            };

            if target_id == w.entity_id {
                continue; // secondary self-guard when qdrant_point_id is null
            }

            // Normalise direction: always store source_id < target_id.
            let (src, tgt) = if w.entity_id < target_id {
                (w.entity_id, target_id)
            } else {
                (target_id, w.entity_id)
            };

            if !seen_pairs.insert((src, tgt)) {
                continue;
            }

            let fact = format!("Semantically similar entities (score: {:.3})", point.score);

            match store
                .insert_edge(src, tgt, "similar_to", &fact, point.score, None)
                .await
            {
                Ok(_) => stats.edges_created += 1,
                Err(e) => {
                    tracing::debug!("note_linking: insert_edge failed: {e:#}");
                }
            }
        }
    }
}

/// Extract entities and edges from `content` and persist them to the graph store.
///
/// This function runs inside a spawned task — it receives owned data only.
///
/// The optional `embedding_store` enables entity embedding storage in Qdrant, which is
/// required for A-MEM note linking to find semantically similar entities across sessions.
///
/// # Errors
///
/// Returns an error if the database query fails or LLM extraction fails.
#[cfg_attr(
    feature = "profiling",
    tracing::instrument(name = "memory.graph_extract", skip_all, fields(entities = tracing::field::Empty, edges = tracing::field::Empty))
)]
pub async fn extract_and_store(
    content: String,
    context_messages: Vec<String>,
    provider: AnyProvider,
    pool: DbPool,
    config: GraphExtractionConfig,
    post_extract_validator: PostExtractValidator,
    embedding_store: Option<Arc<EmbeddingStore>>,
) -> Result<ExtractionResult, MemoryError> {
    use crate::graph::{EntityResolver, GraphExtractor, GraphStore};

    let extractor = GraphExtractor::new(
        provider.clone(),
        config.max_entities,
        config.max_edges,
        config.llm_timeout_secs,
    );
    let ctx_refs: Vec<&str> = context_messages.iter().map(String::as_str).collect();

    let store =
        GraphStore::new(pool).with_benna_rates(config.benna_fast_rate, config.benna_slow_rate);

    bump_extraction_count(store.pool()).await?;

    let Some(result) = extractor.extract(&content, &ctx_refs).await? else {
        return Ok(ExtractionResult::default());
    };

    // Post-extraction validation callback. zeph-memory does not know the callback is a
    // security validator — it is a generic predicate opaque to this crate (design decision D1).
    if let Some(ref validator) = post_extract_validator
        && let Err(reason) = validator(&result)
    {
        tracing::warn!(
            reason,
            "graph extraction validation failed, skipping upsert"
        );
        return Ok(ExtractionResult::default());
    }

    let resolver = if let Some(ref emb) = embedding_store {
        EntityResolver::new(&store)
            .with_embedding_store(emb)
            .with_provider(&provider)
            .with_embed_timeout(config.embed_timeout_secs)
    } else {
        EntityResolver::new(&store).with_embed_timeout(config.embed_timeout_secs)
    };

    let (entity_name_to_id, entities_upserted) = upsert_entities(&resolver, &result.entities).await;
    let edges_inserted = insert_edges(&resolver, &result.edges, &entity_name_to_id, &config).await;

    #[cfg(any(feature = "sqlite", feature = "postgres"))]
    store.checkpoint_wal().await?;

    let new_entity_ids: Vec<i64> = entity_name_to_id.into_values().collect();

    link_episode(&store, &config, &new_entity_ids).await;

    #[cfg(feature = "profiling")]
    {
        let span = tracing::Span::current();
        span.record("entities", entities_upserted);
        span.record("edges", edges_inserted);
    }

    Ok(ExtractionResult {
        stats: ExtractionStats {
            entities_upserted,
            edges_inserted,
        },
        entity_ids: new_entity_ids,
    })
}

/// Increment the extraction counter in `graph_metadata`.
async fn bump_extraction_count(pool: &DbPool) -> Result<(), MemoryError> {
    zeph_db::query(sql!(
        "INSERT INTO graph_metadata (key, value) VALUES ('extraction_count', '0')
         ON CONFLICT(key) DO NOTHING"
    ))
    .execute(pool)
    .await?;
    zeph_db::query(sql!(
        "UPDATE graph_metadata
         SET value = CAST(CAST(value AS INTEGER) + 1 AS TEXT)
         WHERE key = 'extraction_count'"
    ))
    .execute(pool)
    .await?;
    Ok(())
}

/// Upsert all extracted entities and return the name-to-id map and upsert count.
async fn upsert_entities(
    resolver: &crate::graph::EntityResolver<'_>,
    entities: &[crate::graph::extractor::ExtractedEntity],
) -> (std::collections::HashMap<String, i64>, usize) {
    let mut entity_name_to_id: std::collections::HashMap<String, i64> =
        std::collections::HashMap::new();
    let mut entities_upserted = 0usize;

    for entity in entities {
        match resolver
            .resolve(&entity.name, &entity.entity_type, entity.summary.as_deref())
            .await
        {
            Ok((id, _outcome)) => {
                entity_name_to_id.insert(entity.name.clone(), id);
                entities_upserted += 1;
            }
            Err(e) => {
                tracing::debug!("graph: skipping entity {:?}: {e:#}", entity.name);
            }
        }
    }

    (entity_name_to_id, entities_upserted)
}

/// Returns `true` when `relation` is a generic, low-information connector.
///
/// Used by the `MemORAI` write-gate to avoid storing vacuous edges (#3709).
fn is_low_signal_relation(relation: &str) -> bool {
    const LOW_SIGNAL: &[&str] = &[
        "related_to",
        "related to",
        "is related to",
        "associated_with",
        "associated with",
        "has",
        "have",
        "is",
        "are",
        "mentions",
        "mentioned",
        "involves",
        "involved",
    ];
    LOW_SIGNAL.iter().any(|&s| relation.eq_ignore_ascii_case(s))
}

/// Insert extracted edges that have both endpoints in `name_to_id`.
///
/// Returns the number of edges actually inserted.
#[allow(clippy::too_many_lines)]
async fn insert_edges(
    resolver: &crate::graph::EntityResolver<'_>,
    edges: &[crate::graph::extractor::ExtractedEdge],
    name_to_id: &std::collections::HashMap<String, i64>,
    config: &GraphExtractionConfig,
) -> usize {
    let mut edges_inserted = 0usize;
    for edge in edges {
        // MemORAI write-gate: drop low-signal edges below the relevance threshold (#3709).
        if let Some(min_rel) = config.write_gate_min_relevance {
            let conf = edge.confidence.unwrap_or(1.0);
            if conf < min_rel && is_low_signal_relation(&edge.relation) {
                tracing::debug!(
                    relation = %edge.relation,
                    confidence = conf,
                    threshold = min_rel,
                    "write-gate: skipping low-signal edge"
                );
                continue;
            }
        }
        let (Some(&src_id), Some(&tgt_id)) =
            (name_to_id.get(&edge.source), name_to_id.get(&edge.target))
        else {
            tracing::debug!(
                "graph: skipping edge {:?}->{:?}: entity not resolved",
                edge.source,
                edge.target
            );
            continue;
        };
        if src_id == tgt_id {
            tracing::debug!(
                "graph: skipping self-loop edge {:?}->{:?} (entity_id={src_id})",
                edge.source,
                edge.target
            );
            continue;
        }
        // Parse LLM-provided edge_type; default to Semantic on any parse failure so
        // edges are never dropped due to classification errors.
        let edge_type = edge
            .edge_type
            .parse::<crate::graph::EdgeType>()
            .unwrap_or_else(|_| {
                tracing::warn!(
                    raw_type = %edge.edge_type,
                    "graph: unknown edge_type from LLM, defaulting to semantic"
                );
                crate::graph::EdgeType::Semantic
            });
        if config.apex_mem_enabled {
            // APEX-MEM: append-only write path with supersession chains.
            let relation_trimmed = edge.relation.trim();
            let relation_display_clean = strip_control_chars(relation_trimmed);
            let relation_display =
                truncate_to_bytes_ref(&relation_display_clean, MAX_RELATION_BYTES).to_owned();
            let canonical_clean = strip_control_chars(&relation_trimmed.to_lowercase());
            let canonical_relation =
                truncate_to_bytes_ref(&canonical_clean, MAX_RELATION_BYTES).to_owned();
            let fact_clean = strip_control_chars(edge.fact.trim());
            let normalized_fact = truncate_to_bytes_ref(&fact_clean, MAX_FACT_BYTES).to_owned();
            match resolver
                .graph_store()
                .insert_or_supersede_with_turn_index_and_metrics(
                    src_id,
                    tgt_id,
                    &relation_display,
                    &canonical_relation,
                    &normalized_fact,
                    edge.confidence.unwrap_or(DEFAULT_EDGE_CONFIDENCE),
                    None,
                    edge_type,
                    true,
                    None,
                    config.turn_index,
                )
                .await
            {
                Ok(_) => edges_inserted += 1,
                Err(e) => {
                    tracing::debug!("graph: skipping edge (apex): {e:#}");
                }
            }
        } else {
            let belief_cfg =
                config
                    .belief_revision_enabled
                    .then_some(crate::graph::BeliefRevisionConfig {
                        similarity_threshold: config.belief_revision_similarity_threshold,
                    });
            match resolver
                .resolve_edge_typed(
                    src_id,
                    tgt_id,
                    &edge.relation,
                    &edge.fact,
                    edge.confidence.unwrap_or(DEFAULT_EDGE_CONFIDENCE),
                    None,
                    edge_type,
                    belief_cfg.as_ref(),
                )
                .await
            {
                Ok(Some(_)) => edges_inserted += 1,
                Ok(None) => {} // deduplicated
                Err(e) => {
                    tracing::debug!("graph: skipping edge: {e:#}");
                }
            }
        }
    }
    edges_inserted
}

/// Link extracted entities to their GAAMA episode when a conversation ID is configured.
async fn link_episode(
    store: &crate::graph::GraphStore,
    config: &GraphExtractionConfig,
    entity_ids: &[i64],
) {
    let Some(conv_id) = config.conversation_id else {
        return;
    };
    match store.ensure_episode(conv_id).await {
        Ok(episode_id) => {
            for &entity_id in entity_ids {
                if let Err(e) = store.link_entity_to_episode(episode_id, entity_id).await {
                    tracing::debug!("episode linking skipped for entity {entity_id}: {e:#}");
                }
            }
        }
        Err(e) => {
            tracing::warn!("failed to ensure episode for conversation {conv_id}: {e:#}");
        }
    }
}

impl SemanticMemory {
    /// Spawn background graph extraction for a message. Fire-and-forget — never blocks.
    ///
    /// Extraction runs in a separate tokio task with a timeout. Any error or timeout is
    /// logged and the task exits silently; the agent response is never blocked.
    ///
    /// The optional `post_extract_validator` is called after extraction, before upsert.
    /// It is a generic predicate opaque to zeph-memory (design decision D1).
    ///
    /// When `config.note_linking.enabled` is `true` and an embedding store is available,
    /// `link_memory_notes` runs after successful extraction inside the same task, bounded
    /// by `config.note_linking.timeout_secs`.
    ///
    /// # Panics
    ///
    /// Panics if the internal `graph_cancel` mutex is poisoned (another thread panicked
    /// while holding the lock).
    pub fn spawn_graph_extraction(
        &self,
        content: String,
        context_messages: Vec<String>,
        config: GraphExtractionConfig,
        post_extract_validator: PostExtractValidator,
        provider_override: Option<AnyProvider>,
        cancel: CancellationToken,
    ) -> tokio::task::JoinHandle<()> {
        let using_override = provider_override.is_some();
        let provider = provider_override.unwrap_or_else(|| self.provider.clone());
        if using_override {
            tracing::debug!(
                extract_provider = provider.name(),
                "graph extraction using override provider (quality_gate bypassed)"
            );
        }
        *self
            .graph_cancel
            .lock()
            .expect("graph_cancel mutex poisoned") = Some(cancel.clone());

        let ctx = GraphExtractionTaskCtx {
            pool: self.sqlite.pool().clone(),
            provider,
            failure_counter: self.community_detection_failures.clone(),
            extraction_count: self.graph_extraction_count.clone(),
            extraction_failures: self.graph_extraction_failures.clone(),
            embedding_store: self.qdrant.clone(),
            cancel,
        };

        tokio::spawn(run_graph_extraction_task(
            content,
            context_messages,
            config,
            post_extract_validator,
            ctx,
        ))
    }

    /// Signal cooperative cancellation to the current background graph-extraction task.
    ///
    /// Fires the [`CancellationToken`] stored by the most recent [`spawn_graph_extraction`]
    /// call. The task checks the token at community-refresh boundaries, so it exits cleanly
    /// rather than being hard-aborted. This should be called before the supervisor calls
    /// `abort()` on the underlying `JoinHandle` to give the task a chance to flush state.
    ///
    /// No-op if no extraction has been spawned or the previous token has already fired.
    ///
    /// # Panics
    ///
    /// Panics if the internal `graph_cancel` mutex is poisoned (another thread panicked
    /// while holding the lock).
    ///
    /// [`spawn_graph_extraction`]: SemanticMemory::spawn_graph_extraction
    pub fn cancel_graph_extraction(&self) {
        if let Some(token) = self
            .graph_cancel
            .lock()
            .expect("graph_cancel mutex poisoned")
            .as_ref()
        {
            token.cancel();
        }
    }
}

/// Owned context bundled for the spawned extraction task.
///
/// Bundles the Arcs that must be cloned before entering `tokio::spawn`.
struct GraphExtractionTaskCtx {
    pool: DbPool,
    provider: AnyProvider,
    failure_counter: Arc<std::sync::atomic::AtomicU64>,
    extraction_count: Arc<std::sync::atomic::AtomicU64>,
    extraction_failures: Arc<std::sync::atomic::AtomicU64>,
    embedding_store: Option<Arc<EmbeddingStore>>,
    /// Cancellation signal propagated into background sub-tasks (community refresh).
    cancel: CancellationToken,
}

/// Body of the spawned graph-extraction task.
async fn run_graph_extraction_task(
    content: String,
    context_messages: Vec<String>,
    config: GraphExtractionConfig,
    post_extract_validator: PostExtractValidator,
    ctx: GraphExtractionTaskCtx,
) {
    let timeout_dur = std::time::Duration::from_secs(config.extraction_timeout_secs);
    let extraction_result = tokio::time::timeout(
        timeout_dur,
        extract_and_store(
            content,
            context_messages,
            ctx.provider.clone(),
            ctx.pool.clone(),
            config.clone(),
            post_extract_validator,
            ctx.embedding_store.clone(),
        ),
    )
    .await;

    let (extraction_ok, new_entity_ids) = match extraction_result {
        Ok(Ok(result)) => {
            tracing::debug!(
                entities = result.stats.entities_upserted,
                edges = result.stats.edges_inserted,
                "graph extraction completed"
            );
            ctx.extraction_count.fetch_add(1, Ordering::Relaxed);
            (true, result.entity_ids)
        }
        Ok(Err(e)) => {
            tracing::warn!("graph extraction failed: {e:#}");
            ctx.extraction_failures.fetch_add(1, Ordering::Relaxed);
            (false, vec![])
        }
        Err(_elapsed) => {
            tracing::warn!("graph extraction timed out");
            ctx.extraction_failures.fetch_add(1, Ordering::Relaxed);
            (false, vec![])
        }
    };

    run_note_linking(
        extraction_ok,
        &new_entity_ids,
        ctx.pool.clone(),
        ctx.embedding_store,
        ctx.provider.clone(),
        &config,
    )
    .await;

    maybe_refresh_communities(
        extraction_ok,
        ctx.pool,
        ctx.provider,
        ctx.failure_counter,
        &config,
        ctx.cancel,
    )
    .await;
}

/// Run A-MEM note linking after successful extraction when enabled.
async fn run_note_linking(
    extraction_ok: bool,
    new_entity_ids: &[i64],
    pool: DbPool,
    embedding_store: Option<Arc<EmbeddingStore>>,
    provider: AnyProvider,
    config: &GraphExtractionConfig,
) {
    if !extraction_ok || !config.note_linking.enabled || new_entity_ids.is_empty() {
        return;
    }
    let Some(store) = embedding_store else {
        return;
    };
    let linking_timeout = std::time::Duration::from_secs(config.note_linking.timeout_secs);
    match tokio::time::timeout(
        linking_timeout,
        link_memory_notes(new_entity_ids, pool, store, provider, &config.note_linking),
    )
    .await
    {
        Ok(stats) => {
            tracing::debug!(
                entities_processed = stats.entities_processed,
                edges_created = stats.edges_created,
                "note linking completed"
            );
        }
        Err(_elapsed) => {
            tracing::debug!("note linking timed out (partial edges may exist)");
        }
    }
}

/// Trigger community detection, graph eviction, and link-weight decay when the extraction
/// count hits the configured refresh interval.
///
/// Runs inline within the caller's task (no nested `tokio::spawn`). Each long-running step
/// is guarded by `tokio::select!` on `cancel` so shutdown aborts immediately at the next
/// yield point without leaving orphaned tasks.
async fn maybe_refresh_communities(
    extraction_ok: bool,
    pool: DbPool,
    provider: AnyProvider,
    failure_counter: Arc<std::sync::atomic::AtomicU64>,
    config: &GraphExtractionConfig,
    cancel: CancellationToken,
) {
    use crate::graph::GraphStore;

    if !extraction_ok || config.community_refresh_interval == 0 {
        return;
    }

    let store = GraphStore::new(pool.clone());
    let extraction_count = store.extraction_count().await.unwrap_or(0);
    if extraction_count == 0
        || !i64::try_from(config.community_refresh_interval)
            .is_ok_and(|interval| extraction_count % interval == 0)
    {
        return;
    }

    tracing::info!(extraction_count, "triggering community detection refresh");
    let store2 = GraphStore::new(pool);
    let retention_days = config.expired_edge_retention_days;
    let max_cap = config.max_entities_cap;
    let max_prompt_bytes = config.community_summary_max_prompt_bytes;
    let concurrency = config.community_summary_concurrency;
    let edge_chunk_size = config.lpa_edge_chunk_size;
    let decay_lambda = config.link_weight_decay_lambda;
    let decay_interval_secs = config.link_weight_decay_interval_secs;

    tokio::select! {
        () = cancel.cancelled() => {
            tracing::debug!("community refresh cancelled before community detection");
            return;
        }
        result = crate::graph::community::detect_communities(
            &store2,
            &provider,
            max_prompt_bytes,
            concurrency,
            edge_chunk_size,
        ) => {
            match result {
                Ok(count) => {
                    tracing::info!(communities = count, "community detection complete");
                }
                Err(e) => {
                    tracing::warn!("community detection failed: {e:#}");
                    failure_counter.fetch_add(1, Ordering::Relaxed);
                }
            }
        }
    }

    tokio::select! {
        () = cancel.cancelled() => {
            tracing::debug!("community refresh cancelled before graph eviction");
            return;
        }
        result = crate::graph::community::run_graph_eviction(&store2, retention_days, max_cap) => {
            match result {
                Ok(stats) => {
                    tracing::info!(
                        expired_edges = stats.expired_edges_deleted,
                        orphan_entities = stats.orphan_entities_deleted,
                        capped_entities = stats.capped_entities_deleted,
                        "graph eviction complete"
                    );
                }
                Err(e) => {
                    tracing::warn!("graph eviction failed: {e:#}");
                }
            }
        }
    }

    // Time-based link weight decay — independent of eviction cycle.
    if decay_lambda > 0.0 && decay_interval_secs > 0 {
        let now_secs = std::time::SystemTime::now()
            .duration_since(std::time::UNIX_EPOCH)
            .map_or(0, |d| d.as_secs());
        let last_decay = store2
            .get_metadata("last_link_weight_decay_at")
            .await
            .ok()
            .flatten()
            .and_then(|s| s.parse::<u64>().ok())
            .unwrap_or(0);
        if now_secs.saturating_sub(last_decay) >= decay_interval_secs {
            tokio::select! {
                () = cancel.cancelled() => {
                    tracing::debug!("community refresh cancelled before link weight decay");
                }
                result = store2.decay_edge_retrieval_counts(decay_lambda, decay_interval_secs) => {
                    match result {
                        Ok(affected) => {
                            tracing::info!(affected, "link weight decay applied");
                            let _ = store2
                                .set_metadata("last_link_weight_decay_at", &now_secs.to_string())
                                .await;
                        }
                        Err(e) => {
                            tracing::warn!("link weight decay failed: {e:#}");
                        }
                    }
                }
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use std::sync::Arc;

    use zeph_llm::any::AnyProvider;

    use super::{NoteLinkingConfig, extract_and_store};
    use crate::embedding_store::EmbeddingStore;
    use crate::graph::GraphStore;
    use crate::in_memory_store::InMemoryVectorStore;
    use crate::store::SqliteStore;

    use super::GraphExtractionConfig;

    async fn setup() -> (GraphStore, Arc<EmbeddingStore>) {
        let sqlite = SqliteStore::new(":memory:").await.unwrap();
        let pool = sqlite.pool().clone();
        let mem_store = Box::new(InMemoryVectorStore::new());
        let emb = Arc::new(EmbeddingStore::with_store(mem_store, pool.clone()));
        let gs = GraphStore::new(pool);
        (gs, emb)
    }

    /// Regression test for #1829: `extract_and_store()` must pass the provider to `EntityResolver`
    /// so that `store_entity_embedding()` is called and `qdrant_point_id` is set in `SQLite`.
    #[tokio::test]
    async fn extract_and_store_sets_qdrant_point_id_when_embedding_store_provided() {
        let (gs, emb) = setup().await;

        // MockProvider: supports embeddings, returns a valid extraction JSON for chat
        let extraction_json = r#"{"entities":[{"name":"Rust","type":"language","summary":"systems language"}],"edges":[]}"#;
        let mut mock =
            zeph_llm::mock::MockProvider::with_responses(vec![extraction_json.to_owned()]);
        mock.supports_embeddings = true;
        mock.embedding = vec![1.0_f32, 0.0, 0.0, 0.0];
        let provider = AnyProvider::Mock(mock);

        let config = GraphExtractionConfig {
            max_entities: 10,
            max_edges: 10,
            extraction_timeout_secs: 10,
            ..Default::default()
        };

        let result = extract_and_store(
            "Rust is a systems programming language.".to_owned(),
            vec![],
            provider,
            gs.pool().clone(),
            config,
            None,
            Some(emb.clone()),
        )
        .await
        .unwrap();

        assert_eq!(
            result.stats.entities_upserted, 1,
            "one entity should be upserted"
        );

        // The entity must have a qdrant_point_id — this proves store_entity_embedding() was called.
        // Before the fix, EntityResolver was built without a provider, so embed() was never called
        // and qdrant_point_id remained NULL.
        let entity = gs
            .find_entity("rust", crate::graph::EntityType::Language)
            .await
            .unwrap()
            .expect("entity 'rust' must exist in SQLite");

        assert!(
            entity.qdrant_point_id.is_some(),
            "qdrant_point_id must be set when embedding_store + provider are both provided (regression for #1829)"
        );
    }

    /// When no `embedding_store` is provided, `extract_and_store()` must still work correctly
    /// (no embeddings stored, but entities are still upserted).
    #[tokio::test]
    async fn extract_and_store_without_embedding_store_still_upserts_entities() {
        let (gs, _emb) = setup().await;

        let extraction_json = r#"{"entities":[{"name":"Python","type":"language","summary":"scripting"}],"edges":[]}"#;
        let mock = zeph_llm::mock::MockProvider::with_responses(vec![extraction_json.to_owned()]);
        let provider = AnyProvider::Mock(mock);

        let config = GraphExtractionConfig {
            max_entities: 10,
            max_edges: 10,
            extraction_timeout_secs: 10,
            ..Default::default()
        };

        let result = extract_and_store(
            "Python is a scripting language.".to_owned(),
            vec![],
            provider,
            gs.pool().clone(),
            config,
            None,
            None, // no embedding_store
        )
        .await
        .unwrap();

        assert_eq!(result.stats.entities_upserted, 1);

        let entity = gs
            .find_entity("python", crate::graph::EntityType::Language)
            .await
            .unwrap()
            .expect("entity 'python' must exist");

        assert!(
            entity.qdrant_point_id.is_none(),
            "qdrant_point_id must remain None when no embedding_store is provided"
        );
    }

    /// Regression test for #2166: FTS5 entity writes must be visible to a new connection pool
    /// opened after extraction completes. Without `checkpoint_wal()` in `extract_and_store`,
    /// a fresh pool sees stale FTS5 shadow tables and `find_entities_fuzzy` returns empty.
    #[tokio::test]
    async fn extract_and_store_fts5_cross_session_visibility() {
        let file = tempfile::NamedTempFile::new().expect("tempfile");
        let path = file.path().to_str().expect("valid path").to_string();

        // Session A: run extract_and_store on a file DB (not :memory:) so WAL is used.
        {
            let sqlite = crate::store::SqliteStore::new(&path).await.unwrap();
            let extraction_json = r#"{"entities":[{"name":"Ferris","type":"concept","summary":"Rust mascot"}],"edges":[]}"#;
            let mock =
                zeph_llm::mock::MockProvider::with_responses(vec![extraction_json.to_owned()]);
            let provider = AnyProvider::Mock(mock);
            let config = GraphExtractionConfig {
                max_entities: 10,
                max_edges: 10,
                extraction_timeout_secs: 10,
                ..Default::default()
            };
            extract_and_store(
                "Ferris is the Rust mascot.".to_owned(),
                vec![],
                provider,
                sqlite.pool().clone(),
                config,
                None,
                None,
            )
            .await
            .unwrap();
        }

        // Session B: new pool — FTS5 must see the entity extracted in session A.
        let sqlite_b = crate::store::SqliteStore::new(&path).await.unwrap();
        let gs_b = crate::graph::GraphStore::new(sqlite_b.pool().clone());
        let results = gs_b.find_entities_fuzzy("Ferris", 10).await.unwrap();
        assert!(
            !results.is_empty(),
            "FTS5 cross-session (#2166): entity extracted in session A must be visible in session B"
        );
    }

    /// Regression test for #2215: self-loop edges (source == target entity) must be silently
    /// skipped; no edge row should be inserted.
    #[tokio::test]
    async fn extract_and_store_skips_self_loop_edges() {
        let (gs, _emb) = setup().await;

        // LLM returns one entity and one self-loop edge (source == target).
        let extraction_json = r#"{
            "entities":[{"name":"Rust","type":"language","summary":"systems language"}],
            "edges":[{"source":"Rust","target":"Rust","relation":"is","fact":"Rust is Rust","edge_type":"semantic"}]
        }"#;
        let mock = zeph_llm::mock::MockProvider::with_responses(vec![extraction_json.to_owned()]);
        let provider = AnyProvider::Mock(mock);

        let config = GraphExtractionConfig {
            max_entities: 10,
            max_edges: 10,
            extraction_timeout_secs: 10,
            ..Default::default()
        };

        let result = extract_and_store(
            "Rust is a language.".to_owned(),
            vec![],
            provider,
            gs.pool().clone(),
            config,
            None,
            None,
        )
        .await
        .unwrap();

        assert_eq!(result.stats.entities_upserted, 1);
        assert_eq!(
            result.stats.edges_inserted, 0,
            "self-loop edge must be rejected (#2215)"
        );
    }

    /// When `apex_mem_enabled = true`, edges must be inserted via `insert_or_supersede`
    /// (the APEX-MEM append-only path) instead of the legacy `resolve_edge_typed` path.
    /// Verifies that edges are still counted as inserted and that the supersession row
    /// is created in the database.
    #[tokio::test]
    async fn apex_mem_path_inserts_edge_via_insert_or_supersede() {
        let (gs, _emb) = setup().await;

        let extraction_json = r#"{
            "entities":[
                {"name":"Alice","type":"person","summary":"a person"},
                {"name":"Bob","type":"person","summary":"another person"}
            ],
            "edges":[
                {"source":"Alice","target":"Bob","relation":"KNOWS","fact":"Alice knows Bob","edge_type":"semantic"}
            ]
        }"#;
        let mock = zeph_llm::mock::MockProvider::with_responses(vec![extraction_json.to_owned()]);
        let provider = AnyProvider::Mock(mock);

        let config = GraphExtractionConfig {
            max_entities: 10,
            max_edges: 10,
            extraction_timeout_secs: 10,
            apex_mem_enabled: true,
            ..Default::default()
        };

        let result = extract_and_store(
            "Alice knows Bob.".to_owned(),
            vec![],
            provider,
            gs.pool().clone(),
            config,
            None,
            None,
        )
        .await
        .unwrap();

        assert_eq!(result.stats.entities_upserted, 2, "two entities expected");
        assert_eq!(
            result.stats.edges_inserted, 1,
            "APEX-MEM path must insert the edge and count it (#3631)"
        );

        // Verify the edge row exists and its relation preserves display casing.
        let alice_id = gs
            .find_entity("alice", crate::graph::EntityType::Person)
            .await
            .unwrap()
            .expect("entity 'alice' must exist")
            .id
            .0;
        let bob_id = gs
            .find_entity("bob", crate::graph::EntityType::Person)
            .await
            .unwrap()
            .expect("entity 'bob' must exist")
            .id
            .0;
        let edges = gs.edges_exact(alice_id, bob_id).await.unwrap();
        assert_eq!(edges.len(), 1, "exactly one edge expected");
        // canonical_relation is lowercased; relation field preserves original casing post-strip
        assert_eq!(
            edges[0].relation, "KNOWS",
            "display relation must preserve original casing"
        );
    }

    /// Regression for #4297: `embed_work_items` must return an empty Vec (fail-open) when the
    /// batch `join_all` embed call exceeds the 30 s global timeout.
    #[tokio::test]
    async fn embed_work_items_timeout_returns_empty() {
        use zeph_llm::mock::MockProvider;

        // embed_delay_ms > 30_000 ms would make the test too slow; we rely on tokio::time::pause
        // to advance virtual time instantly, so the timeout fires without real delay.
        tokio::time::pause();

        // Delay longer than the 30 s timeout (in virtual time).
        let mut mock = MockProvider::default();
        mock.supports_embeddings = true;
        mock.embed_delay_ms = 31_000;
        let provider = AnyProvider::Mock(mock);

        let work_items = vec![super::EntityWorkItem {
            entity_id: 1,
            canonical_name: "Alice".to_owned(),
            embed_text: "Alice".to_owned(),
            self_point_id: None,
        }];

        let cfg = NoteLinkingConfig {
            timeout_secs: 30,
            ..NoteLinkingConfig::default()
        };
        let result = super::embed_work_items(&work_items, &provider, &cfg).await;
        assert!(
            result.is_empty(),
            "embed_work_items must return empty Vec on 30 s timeout (fail-open)"
        );
    }

    /// Regression for #4622: `maybe_refresh_communities` must return immediately when the
    /// `CancellationToken` is already cancelled, without hanging or panicking.
    ///
    /// Before the fix a nested `tokio::spawn` was used with no `CancellationToken`, so shutdown
    /// could not interrupt community detection.  The inline `tokio::select!` path now exits at
    /// the first select arm when the token is pre-cancelled.
    #[tokio::test]
    async fn maybe_refresh_communities_respects_cancelled_token() {
        use tokio_util::sync::CancellationToken;

        use crate::graph::GraphStore;
        use crate::store::SqliteStore;

        let sqlite = SqliteStore::new(":memory:").await.unwrap();
        let pool = sqlite.pool().clone();
        let gs = GraphStore::new(pool.clone());

        // Seed extraction_count=1 so the interval check passes (1 % 1 == 0).
        gs.set_metadata("extraction_count", "1").await.unwrap();

        let config = GraphExtractionConfig {
            community_refresh_interval: 1,
            ..Default::default()
        };

        let cancel = CancellationToken::new();
        cancel.cancel(); // pre-cancelled — all select! arms must short-circuit immediately

        let extraction_json = r#"{"entities":[],"edges":[]}"#;
        let mock = zeph_llm::mock::MockProvider::with_responses(vec![extraction_json.to_owned()]);
        let provider = AnyProvider::Mock(mock);

        let failure_counter = Arc::new(std::sync::atomic::AtomicU64::new(0));

        // Must complete promptly — if the fix regresses and a blocking call is made this will
        // hang forever (caught by tokio::time::timeout in CI or a test runtime timeout).
        super::maybe_refresh_communities(
            true,
            pool,
            provider,
            failure_counter.clone(),
            &config,
            cancel,
        )
        .await;

        assert_eq!(
            failure_counter.load(std::sync::atomic::Ordering::Relaxed),
            0,
            "no failures should be recorded when cancelled before any detection step"
        );
    }

    #[test]
    fn is_low_signal_known_values() {
        assert!(
            super::is_low_signal_relation("related_to"),
            "related_to must be low-signal"
        );
        assert!(
            super::is_low_signal_relation("related to"),
            "related to (space) must be low-signal"
        );
        assert!(
            super::is_low_signal_relation("IS"),
            "IS (uppercase) must be low-signal (case-insensitive)"
        );
        assert!(
            super::is_low_signal_relation("mentions"),
            "mentions must be low-signal"
        );
    }

    #[test]
    fn is_low_signal_specific_relations_pass() {
        assert!(
            !super::is_low_signal_relation("causes"),
            "causes must NOT be low-signal"
        );
        assert!(
            !super::is_low_signal_relation("works_at"),
            "works_at must NOT be low-signal"
        );
        assert!(
            !super::is_low_signal_relation("born_in"),
            "born_in must NOT be low-signal"
        );
    }

    /// Regression test for #4711: configured Benna-Fusi rates must produce different
    /// `confidence_fast`/`confidence_slow` values than the hardcoded defaults.
    ///
    /// `extract_and_store` builds `GraphStore::new(pool).with_benna_rates(fast, slow)` using
    /// `config.benna_fast_rate` / `config.benna_slow_rate`.  Before the fix it called
    /// `GraphStore::new(pool)` only, so the configured rates were silently ignored.
    ///
    /// This test calls `GraphStore::insert_edge_typed` directly (bypassing the resolver dedup
    /// layer) to exercise the Benna-Fusi UPDATE path with two confidence levels (0.6 → 0.8).
    ///
    /// Math:
    ///   default (0.5/0.05):  fast = 0.6 + 0.5*(0.8-0.6) = 0.7;  slow ≈ 0.605
    ///   custom  (0.1/0.02):  fast = 0.6 + 0.1*(0.8-0.6) = 0.62; slow ≈ 0.6004
    #[tokio::test]
    async fn extract_and_store_respects_configured_benna_rates() {
        use crate::graph::EdgeType;

        async fn run_two_inserts(fast_rate: f32, slow_rate: f32) -> crate::graph::types::Edge {
            let sqlite = crate::store::SqliteStore::new(":memory:").await.unwrap();
            let pool = sqlite.pool().clone();
            let gs = GraphStore::new(pool).with_benna_rates(fast_rate, slow_rate);

            let alice_id = gs
                .upsert_entity("Alice", "alice", crate::graph::EntityType::Person, None)
                .await
                .unwrap();
            let bob_id = gs
                .upsert_entity("Bob", "bob", crate::graph::EntityType::Person, None)
                .await
                .unwrap();

            // Pass 1: INSERT — seeds confidence_fast = confidence_slow = 0.6.
            gs.insert_edge_typed(
                alice_id.0,
                bob_id.0,
                "knows",
                "Alice knows Bob",
                0.6,
                None,
                EdgeType::Semantic,
            )
            .await
            .unwrap();

            // Pass 2: UPDATE — triggers Benna-Fusi merge with incoming confidence = 0.8.
            gs.insert_edge_typed(
                alice_id.0,
                bob_id.0,
                "knows",
                "Alice knows Bob",
                0.8,
                None,
                EdgeType::Semantic,
            )
            .await
            .unwrap();

            let mut edges = gs.edges_exact(alice_id.0, bob_id.0).await.unwrap();
            assert_eq!(edges.len(), 1, "exactly one active edge expected");
            edges.remove(0)
        }

        let default_edge = run_two_inserts(0.5, 0.05).await;
        let custom_edge = run_two_inserts(0.1, 0.02).await;

        // Different rates → different fast/slow.  Before the fix extract_and_store ignored the
        // config fields; all edges would have been identical regardless of configured rates.
        assert!(
            (default_edge.confidence_fast - custom_edge.confidence_fast).abs() > f32::EPSILON,
            "confidence_fast must differ between default (0.5) and custom (0.1) benna_fast_rate (#4711)"
        );
        assert!(
            (default_edge.confidence_slow - custom_edge.confidence_slow).abs() > f32::EPSILON,
            "confidence_slow must differ between default (0.05) and custom (0.02) benna_slow_rate (#4711)"
        );
        // Higher fast_rate → fast variable grows more aggressively: 0.7 (default) > 0.62 (custom).
        assert!(
            default_edge.confidence_fast > custom_edge.confidence_fast,
            "higher benna_fast_rate must produce a larger confidence_fast after merge"
        );
    }

    /// Regression test for #4723: `extract_and_store` must forward `ExtractedEdge.confidence`
    /// to the graph resolver instead of always using the hardcoded fallback 0.8.
    ///
    /// The LLM JSON sets `confidence: 0.3`. Before the fix, line 628 passed `0.8` unconditionally;
    /// after the fix it passes `edge.confidence.unwrap_or(0.8)` which is `0.3` when present.
    /// A freshly-inserted edge sets `confidence_fast = confidence`, so we compare against 0.3.
    #[tokio::test]
    async fn extract_and_store_forwards_edge_confidence_not_hardcoded_08() {
        use crate::graph::{EntityType, GraphStore};

        let sqlite = crate::store::SqliteStore::new(":memory:").await.unwrap();
        let pool = sqlite.pool().clone();

        // confidence = 0.3 is far enough from 0.8 that float imprecision cannot mask the bug.
        let extraction_json = r#"{
            "entities":[
                {"name":"Alice","type":"person","summary":"person"},
                {"name":"Bob","type":"person","summary":"person"}
            ],
            "edges":[{"source":"Alice","target":"Bob","relation":"knows","fact":"Alice knows Bob","edge_type":"semantic","confidence":0.3}]
        }"#;
        let mock = zeph_llm::mock::MockProvider::with_responses(vec![extraction_json.to_owned()]);
        let provider = AnyProvider::Mock(mock);
        let config = GraphExtractionConfig {
            max_entities: 10,
            max_edges: 10,
            extraction_timeout_secs: 10,
            ..Default::default()
        };

        let result = extract_and_store(
            "Alice knows Bob.".to_owned(),
            vec![],
            provider,
            pool.clone(),
            config,
            None,
            None,
        )
        .await
        .unwrap();

        assert_eq!(result.stats.edges_inserted, 1, "one edge must be inserted");

        let gs = GraphStore::new(pool);
        let alice_id: i64 = gs
            .find_entity("alice", EntityType::Person)
            .await
            .unwrap()
            .expect("alice must exist")
            .id
            .0;
        let bob_id: i64 = gs
            .find_entity("bob", EntityType::Person)
            .await
            .unwrap()
            .expect("bob must exist")
            .id
            .0;

        let mut edges = gs.edges_exact(alice_id, bob_id).await.unwrap();
        assert_eq!(edges.len(), 1, "exactly one active edge expected");
        let edge = edges.remove(0);

        // Before fix: confidence_fast would be ~0.8 (hardcoded); after fix: ~0.3 (from JSON).
        assert!(
            (edge.confidence_fast - 0.3_f32).abs() < 0.01,
            "confidence_fast must be ~0.3 (from ExtractedEdge.confidence), got {} (regression for #4723)",
            edge.confidence_fast
        );
    }

    /// Regression for #4723 (APEX-MEM path): `extract_and_store` must forward
    /// `ExtractedEdge.confidence` to `insert_or_supersede_with_turn_index_and_metrics` instead
    /// of using the hardcoded literal `0.8`.
    #[tokio::test]
    async fn extract_and_store_apex_forwards_edge_confidence_not_hardcoded_08() {
        use crate::graph::{EntityType, GraphStore};

        let sqlite = crate::store::SqliteStore::new(":memory:").await.unwrap();
        let pool = sqlite.pool().clone();

        // confidence = 0.3 is far enough from 0.8 that float imprecision cannot mask the bug.
        let extraction_json = r#"{
            "entities":[
                {"name":"Alice","type":"person","summary":"person"},
                {"name":"Bob","type":"person","summary":"person"}
            ],
            "edges":[{"source":"Alice","target":"Bob","relation":"knows","fact":"Alice knows Bob","edge_type":"semantic","confidence":0.3}]
        }"#;
        let mock = zeph_llm::mock::MockProvider::with_responses(vec![extraction_json.to_owned()]);
        let provider = AnyProvider::Mock(mock);
        let config = GraphExtractionConfig {
            max_entities: 10,
            max_edges: 10,
            extraction_timeout_secs: 10,
            apex_mem_enabled: true,
            ..Default::default()
        };

        let result = extract_and_store(
            "Alice knows Bob.".to_owned(),
            vec![],
            provider,
            pool.clone(),
            config,
            None,
            None,
        )
        .await
        .unwrap();

        assert_eq!(result.stats.edges_inserted, 1, "one edge must be inserted");

        let gs = GraphStore::new(pool);
        let alice_id: i64 = gs
            .find_entity("alice", EntityType::Person)
            .await
            .unwrap()
            .expect("alice must exist")
            .id
            .0;
        let bob_id: i64 = gs
            .find_entity("bob", EntityType::Person)
            .await
            .unwrap()
            .expect("bob must exist")
            .id
            .0;

        let mut edges = gs.edges_exact(alice_id, bob_id).await.unwrap();
        assert_eq!(edges.len(), 1, "exactly one active edge expected");
        let edge = edges.remove(0);

        // Before fix: confidence_fast would be ~0.8 (hardcoded); after fix: ~0.3 (from JSON).
        assert!(
            (edge.confidence_fast - 0.3_f32).abs() < 0.01,
            "confidence_fast must be ~0.3 (from ExtractedEdge.confidence), got {} (regression for #4723 APEX path)",
            edge.confidence_fast
        );
    }

    /// Verify `GraphExtractionConfig` default benna rates match `GraphStore::new` defaults.
    ///
    /// If either default is changed in one place but not the other, behavior silently diverges
    /// between paths that call `with_benna_rates` and paths that don't.
    #[test]
    fn graph_extraction_config_benna_defaults_match_graph_store_defaults() {
        let cfg = GraphExtractionConfig::default();
        // GraphStore::new uses 0.5 / 0.05 — these must stay in sync.
        assert!(
            (cfg.benna_fast_rate - 0.5_f32).abs() < f32::EPSILON,
            "benna_fast_rate default must match GraphStore::new default of 0.5"
        );
        assert!(
            (cfg.benna_slow_rate - 0.05_f32).abs() < f32::EPSILON,
            "benna_slow_rate default must match GraphStore::new default of 0.05"
        );
    }
}