Skip to main content

tsift_memgraphrag/
lib.rs

1use anyhow::{Context, Result, bail};
2use serde::{Deserialize, Serialize};
3use std::collections::{BTreeMap, BTreeSet};
4use std::path::Path;
5use tsift_core::{GraphEdge, GraphFreshness, GraphNode, GraphProjection, GraphProvenance};
6use tsift_kg::{
7    ChunkingConfig, KgExtractor, KgInputDocument, KgInputKind, OllamaKgExtractor,
8    extract_documents_to_projection,
9};
10use tsift_local_model::ProviderKind;
11use tsift_memory::{
12    DEFAULT_MEMORY_CANDIDATE_LIMIT, MemoryEvent, MemoryReadPolicy, MemoryReadWatermark,
13    estimate_tokens, memory_read_watermark, read_memory_event_candidates, read_memory_events,
14    read_memory_events_with_policy,
15};
16use tsift_sqlite::SqliteGraphStore;
17
18pub const MEMGRAPHRAG_CONTRACT_VERSION: &str = "tsift-memgraphrag-v1";
19pub const HASH_SEMANTIC_PROVIDER_ID: &str = "tsift-local-hash-v1";
20pub const SEMANTIC_EMBEDDING_MODEL: &str = HASH_SEMANTIC_PROVIDER_ID;
21pub const SEMANTIC_EXTRACTION_MODEL: &str = HASH_SEMANTIC_PROVIDER_ID;
22
23const SEMANTIC_EMBEDDING_DIM: usize = 32;
24const DEFAULT_TRAVERSAL_MEMORY_EVENT_LIMIT: usize = 600;
25const MEMORY_RANK_CANDIDATE_MULTIPLIER: usize = 8;
26const MEMORY_PROJECTION_NODE_ID: &str = "memory_projection:tsift-memory";
27
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub struct SemanticProviderMetadata {
30    pub provider_id: String,
31    pub provider_kind: ProviderKind,
32    pub extraction_model: String,
33    pub embedding_model: String,
34}
35
36impl SemanticProviderMetadata {
37    pub fn hash_fallback() -> Self {
38        Self {
39            provider_id: HASH_SEMANTIC_PROVIDER_ID.to_string(),
40            provider_kind: ProviderKind::HashFallback,
41            extraction_model: SEMANTIC_EXTRACTION_MODEL.to_string(),
42            embedding_model: SEMANTIC_EMBEDDING_MODEL.to_string(),
43        }
44    }
45}
46
47#[derive(Debug, Clone, PartialEq, Eq)]
48pub struct SemanticProviderInput {
49    pub source_ref: String,
50    pub memory_kind: String,
51    pub label: String,
52    pub text: String,
53    pub semantic_text: String,
54    pub imported_from: String,
55    pub session_id: Option<String>,
56    pub observed_at_unix: Option<i64>,
57}
58
59impl SemanticProviderInput {
60    fn from_event(event: &MemoryEvent, label: String, imported_from: &str) -> Self {
61        let semantic_text = format!("{} {}", label, event.text);
62        Self {
63            source_ref: event.source_ref.clone(),
64            memory_kind: event.kind.as_str().to_string(),
65            label,
66            text: event.text.clone(),
67            semantic_text,
68            imported_from: imported_from.to_string(),
69            session_id: event.session_id.clone(),
70            observed_at_unix: event.observed_at_unix,
71        }
72    }
73}
74
75#[derive(Debug, Clone, PartialEq, Eq)]
76pub struct SemanticConceptCandidate {
77    pub stable_key: String,
78    pub label: String,
79    pub detail: String,
80    pub embedding_text: String,
81}
82
83impl SemanticConceptCandidate {
84    pub fn new(
85        stable_key: impl Into<String>,
86        label: impl Into<String>,
87        detail: impl Into<String>,
88        embedding_text: impl Into<String>,
89    ) -> Self {
90        Self {
91            stable_key: stable_key.into(),
92            label: label.into(),
93            detail: detail.into(),
94            embedding_text: embedding_text.into(),
95        }
96    }
97
98    pub fn primary(
99        label: impl Into<String>,
100        detail: impl Into<String>,
101        embedding_text: impl Into<String>,
102    ) -> Self {
103        Self::new("primary", label, detail, embedding_text)
104    }
105}
106
107#[derive(Debug, Clone, PartialEq)]
108pub struct SemanticEmbedding {
109    pub provider_id: String,
110    pub model: String,
111    pub values: Vec<f64>,
112}
113
114impl SemanticEmbedding {
115    pub fn new(provider_id: impl Into<String>, model: impl Into<String>, values: Vec<f64>) -> Self {
116        Self {
117            provider_id: provider_id.into(),
118            model: model.into(),
119            values,
120        }
121    }
122
123    pub fn dimensions(&self) -> usize {
124        self.values.len()
125    }
126
127    pub fn to_property(&self) -> String {
128        self.values
129            .iter()
130            .map(|value| format!("{value:.6}"))
131            .collect::<Vec<_>>()
132            .join(",")
133    }
134}
135
136pub trait SemanticProvider {
137    fn metadata(&self) -> SemanticProviderMetadata;
138    fn extract_concepts(
139        &self,
140        input: &SemanticProviderInput,
141    ) -> Result<Vec<SemanticConceptCandidate>>;
142    fn embed(&self, input: &str) -> Result<SemanticEmbedding>;
143}
144
145#[derive(Debug, Default, Clone, Copy)]
146pub struct HashSemanticProvider;
147
148impl SemanticProvider for HashSemanticProvider {
149    fn metadata(&self) -> SemanticProviderMetadata {
150        SemanticProviderMetadata::hash_fallback()
151    }
152
153    fn extract_concepts(
154        &self,
155        input: &SemanticProviderInput,
156    ) -> Result<Vec<SemanticConceptCandidate>> {
157        Ok(vec![SemanticConceptCandidate::primary(
158            input.label.clone(),
159            "semantic row from tsift-memory hash fallback",
160            input.semantic_text.clone(),
161        )])
162    }
163
164    fn embed(&self, input: &str) -> Result<SemanticEmbedding> {
165        Ok(SemanticEmbedding::new(
166            HASH_SEMANTIC_PROVIDER_ID,
167            SEMANTIC_EMBEDDING_MODEL,
168            semantic_embedding(input),
169        ))
170    }
171}
172
173/// KG-backed semantic provider — routes `extract_concepts` through the
174/// `tsift-kg` extraction pipeline so memgraphrag consumes real model-backed
175/// entity extraction (spec line 27-28 of `specs/local-kg-model.md`).
176///
177/// Generic over `KgExtractor` so tests can drive the pipeline with a stub and
178/// production can use [`OllamaKgExtractor`] (default). The Ollama embedder is
179/// a `#lmlazy` follow-up; `embed` stays on the deterministic hash fallback
180/// until the ollama embedding profile lands, so memgraphrag's vector path
181/// remains online regardless of whether a GPU extractor is available.
182pub struct KgSemanticProvider<E: KgExtractor + ?Sized = OllamaKgExtractor> {
183    extractor: Box<E>,
184}
185
186impl KgSemanticProvider<OllamaKgExtractor> {
187    /// Build an Ollama-backed KG semantic provider for a specific model tag
188    /// (e.g. `hf.co/Qwen/Qwen3-32B-GGUF:Q4_K_M`). Honors `OLLAMA_HOST` via
189    /// the underlying extractor.
190    pub fn ollama(model: impl Into<String>) -> Self {
191        Self {
192            extractor: Box::new(OllamaKgExtractor::new(model)),
193        }
194    }
195}
196
197impl<E: KgExtractor> KgSemanticProvider<E> {
198    /// Build a KG semantic provider over an arbitrary extractor. Tests use this
199    /// to drive the pipeline deterministically without a live model.
200    pub fn with_extractor(extractor: E) -> Self {
201        Self {
202            extractor: Box::new(extractor),
203        }
204    }
205}
206
207impl<E: KgExtractor + ?Sized> SemanticProvider for KgSemanticProvider<E> {
208    fn metadata(&self) -> SemanticProviderMetadata {
209        let kg = self.extractor.metadata();
210        SemanticProviderMetadata {
211            provider_id: kg.provider_id,
212            provider_kind: kg.provider_kind,
213            extraction_model: kg.extraction_model,
214            // Ollama embedder is a #lmlazy follow-up; embed() uses the hash
215            // fallback path, so report its model id honestly here.
216            embedding_model: SEMANTIC_EMBEDDING_MODEL.to_string(),
217        }
218    }
219
220    fn extract_concepts(
221        &self,
222        input: &SemanticProviderInput,
223    ) -> Result<Vec<SemanticConceptCandidate>> {
224        let kind = match input.memory_kind.as_str() {
225            "session" => KgInputKind::Session,
226            "memory" => KgInputKind::Memory,
227            _ => KgInputKind::Source,
228        };
229        let document = KgInputDocument::new(kind, &input.source_ref, &input.text);
230        let report = extract_documents_to_projection(
231            &[document],
232            self.extractor.as_ref(),
233            ChunkingConfig::default(),
234        )
235        .context("tsift-kg extraction pipeline failed")?;
236
237        let mut candidates: Vec<SemanticConceptCandidate> = report
238            .extracted_chunks
239            .iter()
240            .flat_map(|chunk| chunk.payload.entities.iter())
241            .map(|entity| {
242                SemanticConceptCandidate::new(
243                    format!("kg:{}", entity.kind),
244                    entity.label.clone(),
245                    entity
246                        .description
247                        .clone()
248                        .unwrap_or_else(|| entity.kind.clone()),
249                    format!("{} {}", entity.label, entity.kind),
250                )
251            })
252            .collect();
253
254        if candidates.is_empty() {
255            // Degrade gracefully: a model that returned no entities must not
256            // erase the memory row's only concept. Mirror the hash fallback
257            // primary candidate so downstream traversal stays populated.
258            candidates.push(SemanticConceptCandidate::primary(
259                input.label.clone(),
260                "semantic row from tsift-kg (empty extraction)",
261                input.semantic_text.clone(),
262            ));
263        }
264
265        Ok(candidates)
266    }
267
268    fn embed(&self, input: &str) -> Result<SemanticEmbedding> {
269        // Ollama embedding profile is a #lmlazy follow-up; keep the
270        // deterministic hash fallback so the vector path stays online.
271        Ok(SemanticEmbedding::new(
272            HASH_SEMANTIC_PROVIDER_ID,
273            SEMANTIC_EMBEDDING_MODEL,
274            semantic_embedding(input),
275        ))
276    }
277}
278
279pub fn memory_graph_node_kinds() -> Vec<&'static str> {
280    vec![
281        "memory_session",
282        "memory_event",
283        "session",
284        "source_handle",
285        "semantic_concept",
286        "semantic_vector_handle",
287        "memory_projection",
288    ]
289}
290
291pub fn project_memory_events(events: &[MemoryEvent]) -> GraphProjection {
292    let mut projection = GraphProjection::default();
293    let mut sessions = BTreeSet::new();
294
295    for event in events {
296        let event_id = event.stable_id();
297        if let Some(session_id) = &event.session_id
298            && sessions.insert(session_id.clone())
299        {
300            projection.nodes.push(
301                GraphNode::new(
302                    format!("memsess:{}", blake3::hash(session_id.as_bytes()).to_hex()),
303                    "memory_session",
304                    session_id,
305                )
306                .with_property("session_id", session_id)
307                .with_provenance(GraphProvenance::new("tsift-memory", session_id)),
308            );
309        }
310
311        let mut node = GraphNode::new(&event_id, "memory_event", event.kind.as_str())
312            .with_property("event_kind", event.kind.as_str())
313            .with_property("source_ref", &event.source_ref)
314            .with_property("token_estimate", event.token_estimate.to_string())
315            .with_provenance(GraphProvenance::new("tsift-memory", &event.source_ref));
316        if let Some(imported_from) = &event.imported_from {
317            node = node.with_property("imported_from", imported_from);
318        }
319        if let Some(imported_id) = &event.imported_id {
320            node = node.with_property("imported_id", imported_id);
321        }
322        projection.nodes.push(node);
323
324        if let Some(session_id) = &event.session_id {
325            let session_node_id =
326                format!("memsess:{}", blake3::hash(session_id.as_bytes()).to_hex());
327            projection.edges.push(
328                GraphEdge::new(session_node_id, event_id, "records_memory_event")
329                    .with_provenance(GraphProvenance::new("tsift-memory", &event.source_ref)),
330            );
331        }
332    }
333
334    projection
335}
336
337#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
338pub struct MemoryDecayConfig {
339    pub half_life_secs: f64,
340    pub lexical_weight: f64,
341    pub recency_weight: f64,
342}
343
344impl Default for MemoryDecayConfig {
345    fn default() -> Self {
346        Self {
347            half_life_secs: 7.0 * 24.0 * 3600.0,
348            lexical_weight: 0.6,
349            recency_weight: 0.4,
350        }
351    }
352}
353
354#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
355pub struct ScoredMemoryEvent {
356    pub event: MemoryEvent,
357    pub lexical_score: f64,
358    pub recency_score: f64,
359    pub score: f64,
360}
361
362fn memory_query_terms(query: &str) -> Vec<String> {
363    query
364        .split(|c: char| !c.is_alphanumeric())
365        .filter(|term| !term.is_empty())
366        .map(|term| term.to_lowercase())
367        .collect()
368}
369
370fn memory_lexical_overlap(terms: &[String], text: &str) -> f64 {
371    if terms.is_empty() {
372        return 0.0;
373    }
374    let haystack = text.to_lowercase();
375    let hits = terms
376        .iter()
377        .filter(|term| haystack.contains(term.as_str()))
378        .count();
379    hits as f64 / terms.len() as f64
380}
381
382fn memory_recency_decay(observed_at_unix: Option<i64>, now_unix: i64, half_life_secs: f64) -> f64 {
383    match observed_at_unix {
384        Some(observed) => {
385            let age = (now_unix - observed).max(0) as f64;
386            0.5f64.powf(age / half_life_secs.max(1.0))
387        }
388        None => 0.0,
389    }
390}
391
392pub fn rank_memory_events(
393    events: &[MemoryEvent],
394    query: &str,
395    now_unix: i64,
396    config: MemoryDecayConfig,
397    limit: usize,
398) -> Vec<ScoredMemoryEvent> {
399    let terms = memory_query_terms(query);
400    let mut scored: Vec<ScoredMemoryEvent> = events
401        .iter()
402        .map(|event| {
403            let lexical_score = memory_lexical_overlap(&terms, &event.text);
404            let recency_score =
405                memory_recency_decay(event.observed_at_unix, now_unix, config.half_life_secs);
406            let score =
407                config.lexical_weight * lexical_score + config.recency_weight * recency_score;
408            ScoredMemoryEvent {
409                event: event.clone(),
410                lexical_score,
411                recency_score,
412                score,
413            }
414        })
415        .collect();
416    scored.sort_by(|a, b| {
417        b.score
418            .partial_cmp(&a.score)
419            .unwrap_or(std::cmp::Ordering::Equal)
420            .then_with(|| {
421                b.recency_score
422                    .partial_cmp(&a.recency_score)
423                    .unwrap_or(std::cmp::Ordering::Equal)
424            })
425    });
426    scored.truncate(limit);
427    scored
428}
429
430pub fn memory_rank_candidate_limit(limit: usize) -> usize {
431    if limit == 0 {
432        return 0;
433    }
434    limit
435        .saturating_mul(MEMORY_RANK_CANDIDATE_MULTIPLIER)
436        .min(DEFAULT_MEMORY_CANDIDATE_LIMIT.max(limit))
437}
438
439pub fn rank_memory_event_candidates(
440    memory_db: &Path,
441    query: &str,
442    now_unix: i64,
443    config: MemoryDecayConfig,
444    limit: usize,
445) -> Result<Vec<ScoredMemoryEvent>> {
446    let candidate_limit = memory_rank_candidate_limit(limit);
447    let candidates = read_memory_event_candidates(memory_db, query, candidate_limit)?;
448    Ok(rank_memory_events(
449        &candidates,
450        query,
451        now_unix,
452        config,
453        limit,
454    ))
455}
456
457#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
458pub struct MemoryQueryPlan {
459    pub contract_version: String,
460    pub query: String,
461    pub limit: usize,
462    pub candidate_limit: usize,
463    pub max_tokens: usize,
464    pub estimated_query_tokens: usize,
465    pub decay: MemoryDecayConfig,
466    pub output_contract: Vec<String>,
467    pub next_commands: Vec<String>,
468}
469
470pub fn plan_memory_query(query: &str, limit: usize, max_tokens: usize) -> Result<MemoryQueryPlan> {
471    if query.trim().is_empty() {
472        bail!("memory query must not be empty");
473    }
474    Ok(MemoryQueryPlan {
475        contract_version: MEMGRAPHRAG_CONTRACT_VERSION.to_string(),
476        query: query.to_string(),
477        limit,
478        candidate_limit: memory_rank_candidate_limit(limit),
479        max_tokens,
480        estimated_query_tokens: estimate_tokens(query),
481        decay: MemoryDecayConfig::default(),
482        output_contract: vec![
483            "indexed FTS/recent candidate set capped before ranking".to_string(),
484            "decay-weighted ranked memory_event ids (lexical + recency)".to_string(),
485            "per-event lexical_score, recency_score, and blended score".to_string(),
486            "source_ref handles for expansion".to_string(),
487            "graph node ids for neighborhood projection".to_string(),
488            "token estimates for every returned packet".to_string(),
489        ],
490        next_commands: vec![
491            "tsift memory status . --json".to_string(),
492            "tsift memory project-graph . --json".to_string(),
493            "tsift graph-db --path . --json related '<query>'".to_string(),
494        ],
495    })
496}
497
498#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
499pub struct MemoryGraphProjectReport {
500    pub events_projected: usize,
501    pub nodes_upserted: usize,
502    pub edges_upserted: usize,
503    pub read_policy: MemoryReadPolicy,
504    pub source_watermark: String,
505    pub content_hash: String,
506    pub events_available: usize,
507}
508
509pub fn project_memory_into_graph(
510    memory_db: &Path,
511    graph_db: &Path,
512    limit: usize,
513) -> Result<MemoryGraphProjectReport> {
514    project_memory_into_graph_with_policy(memory_db, graph_db, limit, &MemoryReadPolicy::default())
515}
516
517pub fn project_memory_into_graph_with_policy(
518    memory_db: &Path,
519    graph_db: &Path,
520    limit: usize,
521    read_policy: &MemoryReadPolicy,
522) -> Result<MemoryGraphProjectReport> {
523    let events = read_memory_events_with_policy(memory_db, read_policy, limit)?;
524    let watermark = memory_read_watermark(memory_db, read_policy, limit, &events)?;
525    let mut projection = project_memory_events(&events);
526    append_memory_projection_metadata(&mut projection, &watermark)?;
527    let nodes_upserted = projection.nodes.len();
528    let edges_upserted = projection.edges.len();
529    if let Some(parent) = graph_db.parent() {
530        std::fs::create_dir_all(parent)
531            .with_context(|| format!("create graph db dir {}", parent.display()))?;
532    }
533    let mut store = SqliteGraphStore::open(graph_db)
534        .with_context(|| format!("open graph store {}", graph_db.display()))?;
535    store.upsert_projection(&projection)?;
536    Ok(MemoryGraphProjectReport {
537        events_projected: events.len(),
538        nodes_upserted,
539        edges_upserted,
540        read_policy: read_policy.clone(),
541        source_watermark: watermark.source_watermark,
542        content_hash: watermark.content_hash,
543        events_available: watermark.events_available,
544    })
545}
546
547fn append_memory_projection_metadata(
548    projection: &mut GraphProjection,
549    watermark: &MemoryReadWatermark,
550) -> Result<()> {
551    let mut node = GraphNode::new(
552        MEMORY_PROJECTION_NODE_ID,
553        "memory_projection",
554        "tsift-memory graph projection",
555    )
556    .with_property("handle", MEMORY_PROJECTION_NODE_ID)
557    .with_property("ref_id", "tsift-memory")
558    .with_property("provider", "tsift-memory")
559    .with_property("read_policy", watermark.policy.order.as_str())
560    .with_property("limit", watermark.limit.to_string())
561    .with_property("events_read", watermark.events_read.to_string())
562    .with_property("events_available", watermark.events_available.to_string())
563    .with_property("source_watermark", watermark.source_watermark.clone())
564    .with_property("content_hash", watermark.content_hash.clone())
565    .with_provenance(
566        GraphProvenance::new("tsift-memory", "memory_events")
567            .with_content_hash(watermark.content_hash.clone()),
568    )
569    .with_freshness(GraphFreshness::content_hash(
570        watermark.source_watermark.clone(),
571    ));
572    if let Some(query) = &watermark.policy.query {
573        node = node.with_property("query", query.clone());
574    }
575    if let Some(max_rowid) = watermark.max_rowid {
576        node = node.with_property("max_rowid", max_rowid.to_string());
577    }
578    if let Some(max_observed_at_unix) = watermark.max_observed_at_unix {
579        node = node.with_property("max_observed_at_unix", max_observed_at_unix.to_string());
580    }
581    if let Some(max_created_at_unix) = watermark.max_created_at_unix {
582        node = node.with_property("max_created_at_unix", max_created_at_unix.to_string());
583    }
584    projection.nodes.push(node_with_content_freshness(node)?);
585    Ok(())
586}
587
588#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
589pub struct MemoryOntologyGraphReport {
590    pub type_nodes: usize,
591    pub relations: usize,
592}
593
594pub fn derive_memory_ontology_graph(graph_db: &Path) -> Result<MemoryOntologyGraphReport> {
595    if !graph_db.exists() {
596        bail!(
597            "graph store {} does not exist; run `tsift graph-db refresh` or `tsift memory project-graph` first",
598            graph_db.display()
599        );
600    }
601    let mut store = SqliteGraphStore::open(graph_db)
602        .with_context(|| format!("open graph store {}", graph_db.display()))?;
603    let ontology = store.derive_ontology()?;
604    let type_nodes = ontology.nodes.len();
605    let relations = ontology.edges.len();
606    store.upsert_projection(&ontology)?;
607    Ok(MemoryOntologyGraphReport {
608        type_nodes,
609        relations,
610    })
611}
612
613pub fn append_tsift_memory_graph_projection_rows(
614    root: &Path,
615    nodes: &mut Vec<GraphNode>,
616    edges: &mut Vec<GraphEdge>,
617) -> Result<()> {
618    append_tsift_memory_graph_projection_rows_with_limit(
619        root,
620        nodes,
621        edges,
622        DEFAULT_TRAVERSAL_MEMORY_EVENT_LIMIT,
623    )
624}
625
626pub fn append_tsift_memory_graph_projection_rows_with_limit(
627    root: &Path,
628    nodes: &mut Vec<GraphNode>,
629    edges: &mut Vec<GraphEdge>,
630    event_limit: usize,
631) -> Result<()> {
632    let memory_db = tsift_memory::default_memory_db_path(root);
633    if !memory_db.exists() {
634        return Ok(());
635    }
636    let events = match read_memory_events(&memory_db, event_limit) {
637        Ok(events) => events,
638        Err(_) => return Ok(()),
639    };
640    append_memory_events_as_traversal_rows(root, &events, nodes, edges)
641}
642
643pub fn append_memory_events_as_traversal_rows(
644    root: &Path,
645    events: &[MemoryEvent],
646    nodes: &mut Vec<GraphNode>,
647    edges: &mut Vec<GraphEdge>,
648) -> Result<()> {
649    append_memory_events_as_traversal_rows_with_provider(
650        root,
651        events,
652        nodes,
653        edges,
654        &HashSemanticProvider,
655    )
656}
657
658pub fn append_memory_events_as_traversal_rows_with_provider<P: SemanticProvider + ?Sized>(
659    root: &Path,
660    events: &[MemoryEvent],
661    nodes: &mut Vec<GraphNode>,
662    edges: &mut Vec<GraphEdge>,
663    semantic_provider: &P,
664) -> Result<()> {
665    if events.is_empty() {
666        return Ok(());
667    }
668
669    let mut seen_sessions = BTreeSet::new();
670    let mut edge_map = BTreeMap::<(String, String, String), GraphEdge>::new();
671    let provider_metadata = semantic_provider.metadata();
672
673    for event in events {
674        let event_id = event.stable_id();
675        let event_key = memory_event_key(event);
676        let source_handle = stable_handle("tmemsrc", &event_key);
677        let provenance = GraphProvenance::new("tsift-memory", &event.source_ref);
678        let imported_from = event.imported_from.as_deref().unwrap_or("native");
679
680        if let Some(session_id) = &event.session_id {
681            let session_handle =
682                format!("memsess:{}", blake3::hash(session_id.as_bytes()).to_hex());
683            if seen_sessions.insert(session_id.clone()) {
684                let session_node = GraphNode::new(
685                    session_handle.clone(),
686                    "memory_session",
687                    truncate_for_compact(session_id, 80),
688                )
689                .with_property("handle", session_handle.clone())
690                .with_property("ref_id", session_id.clone())
691                .with_property("session_id", session_id.clone())
692                .with_property("provider", "tsift-memory")
693                .with_property(
694                    "expand",
695                    format!(
696                        "tsift memory status {} --json",
697                        shell_quote(root.to_string_lossy().as_ref())
698                    ),
699                )
700                .with_provenance(provenance.clone());
701                nodes.push(node_with_content_freshness(session_node)?);
702            }
703
704            insert_semantic_edge(
705                &mut edge_map,
706                GraphEdge::new(
707                    session_handle.clone(),
708                    event_id.clone(),
709                    "records_memory_event",
710                )
711                .with_property("label", "tsift-memory session event")
712                .with_provenance(provenance.clone()),
713            );
714            insert_semantic_edge(
715                &mut edge_map,
716                GraphEdge::new(
717                    session_handle,
718                    source_handle.clone(),
719                    "records_memory_source",
720                )
721                .with_property("label", "tsift-memory session source")
722                .with_provenance(provenance.clone()),
723            );
724        }
725
726        let label = memory_event_label(event);
727        let mut event_node = GraphNode::new(event_id.clone(), "memory_event", event.kind.as_str())
728            .with_property("handle", event_id.clone())
729            .with_property("ref_id", event.source_ref.clone())
730            .with_property("source_ref", event.source_ref.clone())
731            .with_property("provider", "tsift-memory")
732            .with_property("memory_kind", event.kind.as_str())
733            .with_property("imported_from", imported_from)
734            .with_property("text_preview", truncate_for_compact(&event.text, 240))
735            .with_property("token_estimate", event.token_estimate.to_string())
736            .with_property(
737                "expand",
738                format!(
739                    "tsift memory status {} --json",
740                    shell_quote(root.to_string_lossy().as_ref())
741                ),
742            )
743            .with_provenance(provenance.clone());
744        if let Some(session_id) = &event.session_id {
745            event_node = event_node.with_property("session_id", session_id.clone());
746        }
747        if let Some(observed_at_unix) = event.observed_at_unix {
748            event_node = event_node.with_property("observed_at_unix", observed_at_unix.to_string());
749        }
750        if let Some(imported_id) = &event.imported_id {
751            event_node = event_node.with_property("imported_id", imported_id.clone());
752        }
753        nodes.push(node_with_content_freshness(event_node)?);
754
755        let mut source_node = GraphNode::new(source_handle.clone(), "source_handle", label.clone())
756            .with_property("handle", source_handle.clone())
757            .with_property("ref_id", event.source_ref.clone())
758            .with_property("source_ref", event.source_ref.clone())
759            .with_property("provider", "tsift-memory")
760            .with_property("memory_kind", event.kind.as_str())
761            .with_property("imported_from", imported_from)
762            .with_property("text_preview", truncate_for_compact(&event.text, 240))
763            .with_property("token_estimate", event.token_estimate.to_string())
764            .with_property(
765                "expand",
766                format!(
767                    "tsift memory status {} --json",
768                    shell_quote(root.to_string_lossy().as_ref())
769                ),
770            )
771            .with_provenance(provenance.clone());
772        if let Some(session_id) = &event.session_id {
773            source_node = source_node.with_property("session_id", session_id.clone());
774        }
775        if let Some(observed_at_unix) = event.observed_at_unix {
776            source_node =
777                source_node.with_property("observed_at_unix", observed_at_unix.to_string());
778        }
779        if let Some(imported_id) = &event.imported_id {
780            source_node = source_node.with_property("imported_id", imported_id.clone());
781        }
782        nodes.push(node_with_content_freshness(source_node)?);
783
784        insert_semantic_edge(
785            &mut edge_map,
786            GraphEdge::new(event_id.clone(), source_handle.clone(), "projects_source")
787                .with_property("label", "tsift-memory source projection")
788                .with_provenance(provenance.clone()),
789        );
790
791        let semantic_input = SemanticProviderInput::from_event(event, label.clone(), imported_from);
792        for (concept_index, concept) in semantic_provider
793            .extract_concepts(&semantic_input)?
794            .into_iter()
795            .enumerate()
796        {
797            let semantic_handle =
798                semantic_concept_handle(&event_key, concept_index, &concept.stable_key);
799            let embedding = semantic_provider.embed(&concept.embedding_text)?;
800            let semantic_node = GraphNode::new(
801                semantic_handle.clone(),
802                "semantic_concept",
803                concept.label.clone(),
804            )
805            .with_property("handle", semantic_handle.clone())
806            .with_property("ref_id", event.source_ref.clone())
807            .with_property("detail", concept.detail.clone())
808            .with_property("source_ref", event.source_ref.clone())
809            .with_property("provider", "tsift-memory")
810            .with_property("memory_kind", event.kind.as_str())
811            .with_property("imported_from", imported_from)
812            .with_property("semantic_provider", provider_metadata.provider_id.clone())
813            .with_property(
814                "semantic_provider_kind",
815                provider_kind_name(&provider_metadata.provider_kind),
816            )
817            .with_property(
818                "semantic_extraction_model",
819                provider_metadata.extraction_model.clone(),
820            )
821            .with_property("semantic_key", concept.stable_key.clone())
822            .with_property("embedding_provider", embedding.provider_id.clone())
823            .with_property("embedding_model", embedding.model.clone())
824            .with_property("embedding_dimensions", embedding.dimensions().to_string())
825            .with_property("embedding", embedding.to_property())
826            .with_property(
827                "expand",
828                semantic_related_command(root, &concept.label, SemanticRelatedKind::Concept),
829            )
830            .with_provenance(provenance.clone());
831            nodes.push(node_with_content_freshness(semantic_node)?);
832
833            insert_semantic_edge(
834                &mut edge_map,
835                GraphEdge::new(
836                    source_handle.clone(),
837                    semantic_handle.clone(),
838                    "mentions_concept",
839                )
840                .with_property("label", "tsift-memory semantic source")
841                .with_property("semantic_provider", provider_metadata.provider_id.clone())
842                .with_provenance(provenance.clone()),
843            );
844        }
845    }
846
847    for edge in edge_map.into_values() {
848        edges.push(edge_with_content_freshness(edge)?);
849    }
850
851    Ok(())
852}
853
854fn memory_event_key(event: &MemoryEvent) -> String {
855    match (event.imported_from.as_deref(), event.imported_id.as_deref()) {
856        (Some(imported_from), Some(imported_id)) => {
857            format!("{imported_from}:{imported_id}")
858        }
859        _ => event.stable_id(),
860    }
861}
862
863fn memory_event_label(event: &MemoryEvent) -> String {
864    let first_line = event
865        .text
866        .lines()
867        .map(str::trim)
868        .find(|line| !line.is_empty())
869        .unwrap_or(event.kind.as_str());
870    match event.kind.as_str() {
871        "imported_observation" => {
872            let observation_type = event
873                .metadata
874                .get("observation_type")
875                .map(String::as_str)
876                .unwrap_or("observation");
877            truncate_for_compact(&format!("{observation_type}: {first_line}"), 80)
878        }
879        "imported_session_summary" => truncate_for_compact(&format!("summary: {first_line}"), 80),
880        "imported_user_prompt" => truncate_for_compact(&format!("prompt: {first_line}"), 80),
881        _ => truncate_for_compact(first_line, 80),
882    }
883}
884
885fn truncate_for_compact(input: &str, max_chars: usize) -> String {
886    let trimmed = input.trim();
887    let count = trimmed.chars().count();
888    if count <= max_chars {
889        return trimmed.to_string();
890    }
891    let prefix: String = trimmed.chars().take(max_chars.saturating_sub(3)).collect();
892    format!("{prefix}...")
893}
894
895fn stable_handle(prefix: &str, key: &str) -> String {
896    let mut hasher = blake3::Hasher::new();
897    hasher.update(prefix.as_bytes());
898    hasher.update(&[0]);
899    hasher.update(key.as_bytes());
900    let hex = hasher.finalize().to_hex();
901    format!("{prefix}-{}", &hex[..10])
902}
903
904fn semantic_concept_handle(event_key: &str, index: usize, stable_key: &str) -> String {
905    if index == 0 && stable_key == "primary" {
906        stable_handle("tmemsem", event_key)
907    } else {
908        stable_handle("tmemsem", &format!("{event_key}:{stable_key}"))
909    }
910}
911
912fn provider_kind_name(provider_kind: &ProviderKind) -> &'static str {
913    match provider_kind {
914        ProviderKind::LlamaCpp => "llama.cpp",
915        ProviderKind::Ollama => "ollama",
916        ProviderKind::Vllm => "vllm",
917        ProviderKind::HashFallback => "hash_fallback",
918    }
919}
920
921fn content_hash<T: Serialize>(value: &T) -> Result<String> {
922    let bytes = serde_json::to_vec(value)?;
923    Ok(blake3::hash(&bytes).to_hex().to_string())
924}
925
926fn node_with_content_freshness(mut node: GraphNode) -> Result<GraphNode> {
927    let mut hashable = node.clone();
928    hashable.freshness = None;
929    node.freshness = Some(GraphFreshness::content_hash(content_hash(&hashable)?));
930    Ok(node)
931}
932
933fn edge_with_content_freshness(mut edge: GraphEdge) -> Result<GraphEdge> {
934    let mut hashable = edge.clone();
935    hashable.freshness = None;
936    edge.freshness = Some(GraphFreshness::content_hash(content_hash(&hashable)?));
937    Ok(edge)
938}
939
940#[derive(Clone, Copy)]
941enum SemanticRelatedKind {
942    Concept,
943}
944
945fn semantic_related_kind_name(kind: SemanticRelatedKind) -> &'static str {
946    match kind {
947        SemanticRelatedKind::Concept => "concept",
948    }
949}
950
951fn semantic_related_command(root: &Path, query: &str, kind: SemanticRelatedKind) -> String {
952    format!(
953        "tsift semantic {} --path {} --kind {} --limit 10",
954        shell_quote(query),
955        shell_quote(root.to_string_lossy().as_ref()),
956        semantic_related_kind_name(kind)
957    )
958}
959
960fn semantic_embedding(input: &str) -> Vec<f64> {
961    let mut vector = vec![0.0; SEMANTIC_EMBEDDING_DIM];
962    let mut tokens = traversal_tokens(input);
963    if tokens.is_empty() {
964        let trimmed = input.trim().to_ascii_lowercase();
965        if !trimmed.is_empty() {
966            tokens.insert(trimmed);
967        }
968    }
969
970    for token in tokens {
971        let hash = blake3::hash(token.as_bytes());
972        let bytes = hash.as_bytes();
973        let idx = usize::from(bytes[0]) % SEMANTIC_EMBEDDING_DIM;
974        let sign = if bytes[1] & 1 == 0 { 1.0 } else { -1.0 };
975        vector[idx] += sign;
976    }
977
978    let norm = vector.iter().map(|value| value * value).sum::<f64>().sqrt();
979    if norm > 0.0 {
980        for value in &mut vector {
981            *value /= norm;
982        }
983    }
984    vector
985}
986
987fn traversal_tokens(input: &str) -> BTreeSet<String> {
988    input
989        .split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '_' || ch == '-'))
990        .flat_map(|part| part.split(['_', '-']))
991        .map(str::trim)
992        .filter(|part| part.len() >= 3)
993        .map(|part| part.to_ascii_lowercase())
994        .collect()
995}
996
997fn insert_semantic_edge(
998    edge_map: &mut BTreeMap<(String, String, String), GraphEdge>,
999    edge: GraphEdge,
1000) {
1001    edge_map
1002        .entry((edge.from_id.clone(), edge.to_id.clone(), edge.kind.clone()))
1003        .or_insert(edge);
1004}
1005
1006fn shell_quote(s: &str) -> String {
1007    let unquoted =
1008        if (s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')) {
1009            &s[1..s.len() - 1]
1010        } else {
1011            s
1012        };
1013
1014    if unquoted
1015        .chars()
1016        .all(|c| c.is_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '/')
1017    {
1018        format!("\"{}\"", unquoted)
1019    } else {
1020        format!(
1021            "\"{}\"",
1022            unquoted.replace('\\', "\\\\").replace('"', "\\\"")
1023        )
1024    }
1025}
1026
1027#[cfg(test)]
1028mod tests {
1029    use super::*;
1030    use tempfile::TempDir;
1031    use tsift_memory::{MemoryEventKind, MemoryStore, default_memory_db_path};
1032
1033    #[test]
1034    fn project_memory_events_links_events_to_sessions() {
1035        let event = MemoryEvent::new(MemoryEventKind::ResponseSummary, "session.md", "done")
1036            .with_session_id("session-a");
1037        let projection = project_memory_events(&[event]);
1038        assert_eq!(projection.nodes.len(), 2);
1039        assert_eq!(projection.edges.len(), 1);
1040        assert!(
1041            projection
1042                .nodes
1043                .iter()
1044                .any(|node| node.kind == "memory_session")
1045        );
1046        assert!(
1047            projection
1048                .nodes
1049                .iter()
1050                .any(|node| node.kind == "memory_event")
1051        );
1052    }
1053
1054    #[test]
1055    fn rank_memory_events_prefers_recent_relevant_events() {
1056        let now = 1_700_000_000;
1057        let old = MemoryEvent::new(
1058            MemoryEventKind::ResponseSummary,
1059            "old",
1060            "graph retrieval design shipped",
1061        )
1062        .with_observed_at_unix(now - 30 * 24 * 3600);
1063        let recent = MemoryEvent::new(
1064            MemoryEventKind::ResponseSummary,
1065            "recent",
1066            "graph retrieval follow-up",
1067        )
1068        .with_observed_at_unix(now - 60);
1069        let config = MemoryDecayConfig {
1070            half_life_secs: 7.0 * 24.0 * 3600.0,
1071            lexical_weight: 0.5,
1072            recency_weight: 0.5,
1073        };
1074        let ranked = rank_memory_events(&[old, recent], "graph retrieval", now, config, 10);
1075        assert_eq!(ranked[0].event.source_ref, "recent");
1076    }
1077
1078    #[test]
1079    fn rank_memory_events_keeps_lexical_hits_without_timestamp() {
1080        let now = 1_700_000_000;
1081        let event = MemoryEvent::new(
1082            MemoryEventKind::ResponseSummary,
1083            "untimed",
1084            "semantic graph memory",
1085        );
1086        let off_topic_fresh = MemoryEvent::new(
1087            MemoryEventKind::ResponseSummary,
1088            "fresh",
1089            "unrelated build log output",
1090        )
1091        .with_observed_at_unix(now - 10);
1092        let config = MemoryDecayConfig::default();
1093        let ranked = rank_memory_events(
1094            &[event.clone(), off_topic_fresh],
1095            "semantic graph memory",
1096            now,
1097            config,
1098            10,
1099        );
1100        assert_eq!(ranked[0].event.source_ref, event.source_ref);
1101    }
1102
1103    #[test]
1104    fn rank_memory_event_candidates_bounds_db_candidates_before_scoring() {
1105        let dir = TempDir::new().unwrap();
1106        let memory_db = default_memory_db_path(dir.path());
1107        std::fs::create_dir_all(memory_db.parent().unwrap()).unwrap();
1108        let store = MemoryStore::open_or_create(&memory_db).unwrap();
1109        let now = 1_700_000_000;
1110        for index in 0..40 {
1111            store
1112                .insert_event(
1113                    &MemoryEvent::new(
1114                        MemoryEventKind::ResponseSummary,
1115                        format!("old-{index}"),
1116                        format!("ordinary memory event {index}"),
1117                    )
1118                    .with_observed_at_unix(now - 20_000 - index),
1119                )
1120                .unwrap();
1121        }
1122        store
1123            .insert_event(
1124                &MemoryEvent::new(
1125                    MemoryEventKind::ResponseSummary,
1126                    "needle",
1127                    "semantic needle graph retrieval",
1128                )
1129                .with_observed_at_unix(now - 30_000),
1130            )
1131            .unwrap();
1132        store
1133            .insert_event(
1134                &MemoryEvent::new(
1135                    MemoryEventKind::ResponseSummary,
1136                    "recent",
1137                    "fresh unrelated release note",
1138                )
1139                .with_observed_at_unix(now - 10),
1140            )
1141            .unwrap();
1142
1143        assert_eq!(memory_rank_candidate_limit(2), 16);
1144        let ranked = rank_memory_event_candidates(
1145            &memory_db,
1146            "semantic needle",
1147            now,
1148            MemoryDecayConfig::default(),
1149            2,
1150        )
1151        .unwrap();
1152        assert_eq!(ranked.len(), 2);
1153        assert!(
1154            ranked
1155                .iter()
1156                .any(|scored| scored.event.source_ref == "needle")
1157        );
1158        assert!(
1159            ranked
1160                .iter()
1161                .any(|scored| scored.event.source_ref == "recent")
1162        );
1163    }
1164
1165    #[test]
1166    fn plan_memory_query_carries_default_decay_config() {
1167        let plan = plan_memory_query("graph rag", 5, 1500).unwrap();
1168        assert_eq!(plan.decay, MemoryDecayConfig::default());
1169        assert_eq!(plan.candidate_limit, 40);
1170        assert!(
1171            plan.output_contract
1172                .iter()
1173                .any(|contract| contract.contains("candidate set capped before ranking"))
1174        );
1175        assert!(
1176            plan.next_commands
1177                .iter()
1178                .any(|cmd| cmd.contains("project-graph"))
1179        );
1180    }
1181
1182    #[test]
1183    fn project_memory_into_graph_persists_memory_nodes() {
1184        let dir = TempDir::new().unwrap();
1185        let root = dir.path();
1186        let memory_db = default_memory_db_path(root);
1187        std::fs::create_dir_all(memory_db.parent().unwrap()).unwrap();
1188
1189        let store = MemoryStore::open_or_create(&memory_db).unwrap();
1190        let mut prompt = MemoryEvent::new(
1191            MemoryEventKind::PromptTarget,
1192            "session.md",
1193            "run the gated backlog items",
1194        );
1195        prompt.session_id = Some("sess-1".to_string());
1196        prompt.observed_at_unix = Some(1_700_000_000);
1197        let mut response = MemoryEvent::new(
1198            MemoryEventKind::ResponseSummary,
1199            "session.md",
1200            "decay weighted retrieval shipped",
1201        );
1202        response.session_id = Some("sess-1".to_string());
1203        response.observed_at_unix = Some(1_700_000_100);
1204        store.insert_event(&prompt).unwrap();
1205        store.insert_event(&response).unwrap();
1206
1207        let graph_db = root.join(".tsift").join("graph.db");
1208        let report = project_memory_into_graph(&memory_db, &graph_db, 100).unwrap();
1209        assert_eq!(report.events_projected, 2);
1210        assert!(
1211            report.nodes_upserted >= 3,
1212            "two events + one session node, got {}",
1213            report.nodes_upserted
1214        );
1215        assert!(
1216            report.edges_upserted >= 2,
1217            "session records each event, got {}",
1218            report.edges_upserted
1219        );
1220
1221        let conn = rusqlite::Connection::open(&graph_db).unwrap();
1222        let memory_events: i64 = conn
1223            .query_row(
1224                "SELECT COUNT(*) FROM graph_nodes WHERE kind = 'memory_event'",
1225                [],
1226                |row| row.get(0),
1227            )
1228            .unwrap();
1229        assert_eq!(memory_events, 2);
1230        let sessions: i64 = conn
1231            .query_row(
1232                "SELECT COUNT(*) FROM graph_nodes WHERE kind = 'memory_session'",
1233                [],
1234                |row| row.get(0),
1235            )
1236            .unwrap();
1237        assert_eq!(sessions, 1);
1238    }
1239
1240    #[test]
1241    fn traversal_projection_adds_semantic_memory_rows() {
1242        let dir = TempDir::new().unwrap();
1243        let event = MemoryEvent::new(
1244            MemoryEventKind::ResponseSummary,
1245            "session.md",
1246            "semantic memory graph",
1247        )
1248        .with_session_id("sess-1")
1249        .with_observed_at_unix(1_700_000_000);
1250        let mut nodes = Vec::new();
1251        let mut edges = Vec::new();
1252        append_memory_events_as_traversal_rows(dir.path(), &[event], &mut nodes, &mut edges)
1253            .unwrap();
1254
1255        assert!(nodes.iter().any(|node| node.kind == "memory_event"));
1256        assert!(nodes.iter().any(|node| {
1257            node.kind == "semantic_concept"
1258                && node.properties.get("provider") == Some(&"tsift-memory".to_string())
1259                && node.properties.get("semantic_provider")
1260                    == Some(&HASH_SEMANTIC_PROVIDER_ID.to_string())
1261                && node.properties.get("semantic_provider_kind")
1262                    == Some(&"hash_fallback".to_string())
1263                && node.properties.get("embedding_model")
1264                    == Some(&SEMANTIC_EMBEDDING_MODEL.to_string())
1265                && node.properties.get("embedding_dimensions") == Some(&"32".to_string())
1266        }));
1267        assert!(edges.iter().any(|edge| edge.kind == "mentions_concept"));
1268    }
1269
1270    #[derive(Debug)]
1271    struct FixtureSemanticProvider;
1272
1273    impl SemanticProvider for FixtureSemanticProvider {
1274        fn metadata(&self) -> SemanticProviderMetadata {
1275            SemanticProviderMetadata {
1276                provider_id: "fixture-local-provider".to_string(),
1277                provider_kind: ProviderKind::LlamaCpp,
1278                extraction_model: "fixture-extractor".to_string(),
1279                embedding_model: "fixture-embedder".to_string(),
1280            }
1281        }
1282
1283        fn extract_concepts(
1284            &self,
1285            input: &SemanticProviderInput,
1286        ) -> Result<Vec<SemanticConceptCandidate>> {
1287            assert_eq!(input.source_ref, "session.md");
1288            Ok(vec![SemanticConceptCandidate::new(
1289                "fixture-concept",
1290                "provider extracted concept",
1291                "semantic row from fixture provider",
1292                "provider extracted concept embedding text",
1293            )])
1294        }
1295
1296        fn embed(&self, input: &str) -> Result<SemanticEmbedding> {
1297            assert!(input.contains("embedding text"));
1298            Ok(SemanticEmbedding::new(
1299                "fixture-local-provider",
1300                "fixture-embedder",
1301                vec![1.0, 0.0, -1.0],
1302            ))
1303        }
1304    }
1305
1306    #[test]
1307    fn traversal_projection_uses_injected_semantic_provider() {
1308        let dir = TempDir::new().unwrap();
1309        let event = MemoryEvent::new(
1310            MemoryEventKind::ResponseSummary,
1311            "session.md",
1312            "semantic provider graph",
1313        )
1314        .with_session_id("sess-1")
1315        .with_observed_at_unix(1_700_000_000);
1316        let mut nodes = Vec::new();
1317        let mut edges = Vec::new();
1318        append_memory_events_as_traversal_rows_with_provider(
1319            dir.path(),
1320            &[event],
1321            &mut nodes,
1322            &mut edges,
1323            &FixtureSemanticProvider,
1324        )
1325        .unwrap();
1326
1327        let semantic = nodes
1328            .iter()
1329            .find(|node| node.kind == "semantic_concept")
1330            .expect("expected semantic concept from fixture provider");
1331        assert_eq!(semantic.label, "provider extracted concept");
1332        assert_eq!(
1333            semantic.properties.get("semantic_provider"),
1334            Some(&"fixture-local-provider".to_string())
1335        );
1336        assert_eq!(
1337            semantic.properties.get("semantic_provider_kind"),
1338            Some(&"llama.cpp".to_string())
1339        );
1340        assert_eq!(
1341            semantic.properties.get("semantic_extraction_model"),
1342            Some(&"fixture-extractor".to_string())
1343        );
1344        assert_eq!(
1345            semantic.properties.get("embedding_provider"),
1346            Some(&"fixture-local-provider".to_string())
1347        );
1348        assert_eq!(
1349            semantic.properties.get("embedding_model"),
1350            Some(&"fixture-embedder".to_string())
1351        );
1352        assert_eq!(
1353            semantic.properties.get("embedding_dimensions"),
1354            Some(&"3".to_string())
1355        );
1356        assert_eq!(
1357            semantic.properties.get("embedding"),
1358            Some(&"1.000000,0.000000,-1.000000".to_string())
1359        );
1360        assert!(edges.iter().any(|edge| {
1361            edge.kind == "mentions_concept"
1362                && edge.properties.get("semantic_provider")
1363                    == Some(&"fixture-local-provider".to_string())
1364        }));
1365    }
1366
1367    /// Stub `KgExtractor` returning a canned entity payload so the
1368    /// KgSemanticProvider pipeline can be exercised without a live model.
1369    struct StubKgExtractor {
1370        payload_json: String,
1371        metadata: tsift_kg::KgExtractorMetadata,
1372    }
1373
1374    impl KgExtractor for StubKgExtractor {
1375        fn metadata(&self) -> tsift_kg::KgExtractorMetadata {
1376            self.metadata.clone()
1377        }
1378        fn extract_json(&self, _chunk: &tsift_kg::KgChunk) -> Result<String> {
1379            Ok(self.payload_json.clone())
1380        }
1381    }
1382
1383    fn stub_extractor() -> StubKgExtractor {
1384        StubKgExtractor {
1385            payload_json: r#"{"entities":[
1386                {"id":"e0","label":"tsift-kg","kind":"crate","description":"KG extraction crate","confidence":0.9},
1387                {"id":"e1","label":"OllamaKgExtractor","kind":"struct","confidence":0.8}
1388            ],"relations":[]}"#
1389                .to_string(),
1390            metadata: tsift_kg::KgExtractorMetadata {
1391                provider_id: "stub-kg-provider".to_string(),
1392                provider_kind: ProviderKind::Ollama,
1393                extraction_model: "stub-model".to_string(),
1394            },
1395        }
1396    }
1397
1398    #[test]
1399    fn kg_semantic_provider_metadata_propagates_extractor_metadata() {
1400        let provider = KgSemanticProvider::with_extractor(stub_extractor());
1401        let metadata = provider.metadata();
1402        assert_eq!(metadata.provider_id, "stub-kg-provider");
1403        assert_eq!(metadata.provider_kind, ProviderKind::Ollama);
1404        assert_eq!(metadata.extraction_model, "stub-model");
1405        // Embedder stays on the hash fallback until the #lmlazy ollama embedder
1406        // follow-up ships.
1407        assert_eq!(metadata.embedding_model, HASH_SEMANTIC_PROVIDER_ID);
1408    }
1409
1410    #[test]
1411    fn kg_semantic_provider_extracts_concepts_via_tsift_kg_pipeline() {
1412        let provider = KgSemanticProvider::with_extractor(stub_extractor());
1413        let input = SemanticProviderInput {
1414            source_ref: "session.md".to_string(),
1415            memory_kind: "source".to_string(),
1416            label: "kg row".to_string(),
1417            text: "tsift-kg extracts entities via OllamaKgExtractor.".to_string(),
1418            semantic_text: "kg row tsift-kg extracts entities".to_string(),
1419            imported_from: "test".to_string(),
1420            session_id: None,
1421            observed_at_unix: None,
1422        };
1423        let candidates = provider
1424            .extract_concepts(&input)
1425            .expect("KG pipeline should produce candidates");
1426        assert_eq!(candidates.len(), 2);
1427        assert_eq!(candidates[0].label, "tsift-kg");
1428        assert_eq!(candidates[0].stable_key, "kg:crate");
1429        assert_eq!(candidates[1].label, "OllamaKgExtractor");
1430        assert_eq!(candidates[1].stable_key, "kg:struct");
1431    }
1432
1433    #[test]
1434    fn kg_semantic_provider_falls_back_when_extractor_returns_no_entities() {
1435        let empty_stub = StubKgExtractor {
1436            payload_json: r#"{"entities":[],"relations":[]}"#.to_string(),
1437            metadata: tsift_kg::KgExtractorMetadata {
1438                provider_id: "empty-stub".to_string(),
1439                provider_kind: ProviderKind::Ollama,
1440                extraction_model: "empty-stub-model".to_string(),
1441            },
1442        };
1443        let provider = KgSemanticProvider::with_extractor(empty_stub);
1444        let input = SemanticProviderInput {
1445            source_ref: "session.md".to_string(),
1446            memory_kind: "source".to_string(),
1447            label: "ghost row".to_string(),
1448            text: "no entities here".to_string(),
1449            semantic_text: "ghost row no entities here".to_string(),
1450            imported_from: "test".to_string(),
1451            session_id: None,
1452            observed_at_unix: None,
1453        };
1454        let candidates = provider
1455            .extract_concepts(&input)
1456            .expect("empty extraction must not fail the cycle");
1457        assert_eq!(candidates.len(), 1);
1458        assert_eq!(candidates[0].label, "ghost row");
1459        assert_eq!(candidates[0].detail, "semantic row from tsift-kg (empty extraction)");
1460    }
1461
1462    #[test]
1463    fn kg_semantic_provider_embed_uses_hash_fallback() {
1464        let provider = KgSemanticProvider::with_extractor(stub_extractor());
1465        let embedding = provider.embed("tsift-kg ollama").expect("embed succeeds");
1466        assert_eq!(embedding.provider_id, HASH_SEMANTIC_PROVIDER_ID);
1467        assert_eq!(embedding.model, SEMANTIC_EMBEDDING_MODEL);
1468        assert_eq!(embedding.dimensions(), SEMANTIC_EMBEDDING_DIM);
1469    }
1470
1471    #[test]
1472    fn kg_semantic_provider_drives_traversal_projection() {
1473        // End-to-end: KgSemanticProvider feeds the same traversal path the
1474        // FixtureSemanticProvider test exercises, proving memgraphrag now
1475        // consumes tsift-kg extractors in the real graph-build flow.
1476        let dir = TempDir::new().unwrap();
1477        let event = MemoryEvent::new(
1478            MemoryEventKind::ResponseSummary,
1479            "session.md",
1480            "kg semantic provider traversal",
1481        )
1482        .with_session_id("sess-kg")
1483        .with_observed_at_unix(1_700_000_000);
1484        let mut nodes = Vec::new();
1485        let mut edges = Vec::new();
1486        let provider = KgSemanticProvider::with_extractor(stub_extractor());
1487        append_memory_events_as_traversal_rows_with_provider(
1488            dir.path(),
1489            &[event],
1490            &mut nodes,
1491            &mut edges,
1492            &provider,
1493        )
1494        .unwrap();
1495
1496        let semantic = nodes
1497            .iter()
1498            .find(|node| node.kind == "semantic_concept")
1499            .expect("expected semantic concept from KG provider");
1500        assert_eq!(semantic.label, "tsift-kg");
1501        assert_eq!(
1502            semantic.properties.get("semantic_provider"),
1503            Some(&"stub-kg-provider".to_string())
1504        );
1505        assert_eq!(
1506            semantic.properties.get("semantic_provider_kind"),
1507            Some(&"ollama".to_string())
1508        );
1509        assert_eq!(
1510            semantic.properties.get("semantic_extraction_model"),
1511            Some(&"stub-model".to_string())
1512        );
1513        assert!(edges.iter().any(|edge| edge.kind == "mentions_concept"));
1514    }
1515}