1use anyhow::{Context, Result, bail};
2use serde::{Deserialize, Serialize};
3use std::collections::{BTreeMap, BTreeSet};
4use std::path::Path;
5use tsift_core::{GraphEdge, GraphFreshness, GraphNode, GraphProjection, GraphProvenance};
6use tsift_kg::{
7 ChunkingConfig, KgExtractor, KgInputDocument, KgInputKind, OllamaKgExtractor,
8 extract_documents_to_projection,
9};
10use tsift_local_model::ProviderKind;
11use tsift_memory::{
12 DEFAULT_MEMORY_CANDIDATE_LIMIT, MemoryEvent, MemoryReadPolicy, MemoryReadWatermark,
13 estimate_tokens, memory_read_watermark, read_memory_event_candidates, read_memory_events,
14 read_memory_events_with_policy,
15};
16use tsift_sqlite::SqliteGraphStore;
17
18pub const MEMGRAPHRAG_CONTRACT_VERSION: &str = "tsift-memgraphrag-v1";
19pub const HASH_SEMANTIC_PROVIDER_ID: &str = "tsift-local-hash-v1";
20pub const SEMANTIC_EMBEDDING_MODEL: &str = HASH_SEMANTIC_PROVIDER_ID;
21pub const SEMANTIC_EXTRACTION_MODEL: &str = HASH_SEMANTIC_PROVIDER_ID;
22
23const SEMANTIC_EMBEDDING_DIM: usize = 32;
24const DEFAULT_TRAVERSAL_MEMORY_EVENT_LIMIT: usize = 600;
25const MEMORY_RANK_CANDIDATE_MULTIPLIER: usize = 8;
26const MEMORY_PROJECTION_NODE_ID: &str = "memory_projection:tsift-memory";
27
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub struct SemanticProviderMetadata {
30 pub provider_id: String,
31 pub provider_kind: ProviderKind,
32 pub extraction_model: String,
33 pub embedding_model: String,
34}
35
36impl SemanticProviderMetadata {
37 pub fn hash_fallback() -> Self {
38 Self {
39 provider_id: HASH_SEMANTIC_PROVIDER_ID.to_string(),
40 provider_kind: ProviderKind::HashFallback,
41 extraction_model: SEMANTIC_EXTRACTION_MODEL.to_string(),
42 embedding_model: SEMANTIC_EMBEDDING_MODEL.to_string(),
43 }
44 }
45}
46
47#[derive(Debug, Clone, PartialEq, Eq)]
48pub struct SemanticProviderInput {
49 pub source_ref: String,
50 pub memory_kind: String,
51 pub label: String,
52 pub text: String,
53 pub semantic_text: String,
54 pub imported_from: String,
55 pub session_id: Option<String>,
56 pub observed_at_unix: Option<i64>,
57}
58
59impl SemanticProviderInput {
60 fn from_event(event: &MemoryEvent, label: String, imported_from: &str) -> Self {
61 let semantic_text = format!("{} {}", label, event.text);
62 Self {
63 source_ref: event.source_ref.clone(),
64 memory_kind: event.kind.as_str().to_string(),
65 label,
66 text: event.text.clone(),
67 semantic_text,
68 imported_from: imported_from.to_string(),
69 session_id: event.session_id.clone(),
70 observed_at_unix: event.observed_at_unix,
71 }
72 }
73}
74
75#[derive(Debug, Clone, PartialEq, Eq)]
76pub struct SemanticConceptCandidate {
77 pub stable_key: String,
78 pub label: String,
79 pub detail: String,
80 pub embedding_text: String,
81}
82
83impl SemanticConceptCandidate {
84 pub fn new(
85 stable_key: impl Into<String>,
86 label: impl Into<String>,
87 detail: impl Into<String>,
88 embedding_text: impl Into<String>,
89 ) -> Self {
90 Self {
91 stable_key: stable_key.into(),
92 label: label.into(),
93 detail: detail.into(),
94 embedding_text: embedding_text.into(),
95 }
96 }
97
98 pub fn primary(
99 label: impl Into<String>,
100 detail: impl Into<String>,
101 embedding_text: impl Into<String>,
102 ) -> Self {
103 Self::new("primary", label, detail, embedding_text)
104 }
105}
106
107#[derive(Debug, Clone, PartialEq)]
108pub struct SemanticEmbedding {
109 pub provider_id: String,
110 pub model: String,
111 pub values: Vec<f64>,
112}
113
114impl SemanticEmbedding {
115 pub fn new(provider_id: impl Into<String>, model: impl Into<String>, values: Vec<f64>) -> Self {
116 Self {
117 provider_id: provider_id.into(),
118 model: model.into(),
119 values,
120 }
121 }
122
123 pub fn dimensions(&self) -> usize {
124 self.values.len()
125 }
126
127 pub fn to_property(&self) -> String {
128 self.values
129 .iter()
130 .map(|value| format!("{value:.6}"))
131 .collect::<Vec<_>>()
132 .join(",")
133 }
134}
135
136pub trait SemanticProvider {
137 fn metadata(&self) -> SemanticProviderMetadata;
138 fn extract_concepts(
139 &self,
140 input: &SemanticProviderInput,
141 ) -> Result<Vec<SemanticConceptCandidate>>;
142 fn embed(&self, input: &str) -> Result<SemanticEmbedding>;
143}
144
145#[derive(Debug, Default, Clone, Copy)]
146pub struct HashSemanticProvider;
147
148impl SemanticProvider for HashSemanticProvider {
149 fn metadata(&self) -> SemanticProviderMetadata {
150 SemanticProviderMetadata::hash_fallback()
151 }
152
153 fn extract_concepts(
154 &self,
155 input: &SemanticProviderInput,
156 ) -> Result<Vec<SemanticConceptCandidate>> {
157 Ok(vec![SemanticConceptCandidate::primary(
158 input.label.clone(),
159 "semantic row from tsift-memory hash fallback",
160 input.semantic_text.clone(),
161 )])
162 }
163
164 fn embed(&self, input: &str) -> Result<SemanticEmbedding> {
165 Ok(SemanticEmbedding::new(
166 HASH_SEMANTIC_PROVIDER_ID,
167 SEMANTIC_EMBEDDING_MODEL,
168 semantic_embedding(input),
169 ))
170 }
171}
172
173pub struct KgSemanticProvider<E: KgExtractor + ?Sized = OllamaKgExtractor> {
183 extractor: Box<E>,
184}
185
186impl KgSemanticProvider<OllamaKgExtractor> {
187 pub fn ollama(model: impl Into<String>) -> Self {
191 Self {
192 extractor: Box::new(OllamaKgExtractor::new(model)),
193 }
194 }
195}
196
197impl<E: KgExtractor> KgSemanticProvider<E> {
198 pub fn with_extractor(extractor: E) -> Self {
201 Self {
202 extractor: Box::new(extractor),
203 }
204 }
205}
206
207impl<E: KgExtractor + ?Sized> SemanticProvider for KgSemanticProvider<E> {
208 fn metadata(&self) -> SemanticProviderMetadata {
209 let kg = self.extractor.metadata();
210 SemanticProviderMetadata {
211 provider_id: kg.provider_id,
212 provider_kind: kg.provider_kind,
213 extraction_model: kg.extraction_model,
214 embedding_model: SEMANTIC_EMBEDDING_MODEL.to_string(),
217 }
218 }
219
220 fn extract_concepts(
221 &self,
222 input: &SemanticProviderInput,
223 ) -> Result<Vec<SemanticConceptCandidate>> {
224 let kind = match input.memory_kind.as_str() {
225 "session" => KgInputKind::Session,
226 "memory" => KgInputKind::Memory,
227 _ => KgInputKind::Source,
228 };
229 let document = KgInputDocument::new(kind, &input.source_ref, &input.text);
230 let report = extract_documents_to_projection(
231 &[document],
232 self.extractor.as_ref(),
233 ChunkingConfig::default(),
234 )
235 .context("tsift-kg extraction pipeline failed")?;
236
237 let mut candidates: Vec<SemanticConceptCandidate> = report
238 .extracted_chunks
239 .iter()
240 .flat_map(|chunk| chunk.payload.entities.iter())
241 .map(|entity| {
242 SemanticConceptCandidate::new(
243 format!("kg:{}", entity.kind),
244 entity.label.clone(),
245 entity
246 .description
247 .clone()
248 .unwrap_or_else(|| entity.kind.clone()),
249 format!("{} {}", entity.label, entity.kind),
250 )
251 })
252 .collect();
253
254 if candidates.is_empty() {
255 candidates.push(SemanticConceptCandidate::primary(
259 input.label.clone(),
260 "semantic row from tsift-kg (empty extraction)",
261 input.semantic_text.clone(),
262 ));
263 }
264
265 Ok(candidates)
266 }
267
268 fn embed(&self, input: &str) -> Result<SemanticEmbedding> {
269 Ok(SemanticEmbedding::new(
272 HASH_SEMANTIC_PROVIDER_ID,
273 SEMANTIC_EMBEDDING_MODEL,
274 semantic_embedding(input),
275 ))
276 }
277}
278
279pub fn memory_graph_node_kinds() -> Vec<&'static str> {
280 vec![
281 "memory_session",
282 "memory_event",
283 "session",
284 "source_handle",
285 "semantic_concept",
286 "semantic_vector_handle",
287 "memory_projection",
288 ]
289}
290
291pub fn project_memory_events(events: &[MemoryEvent]) -> GraphProjection {
292 let mut projection = GraphProjection::default();
293 let mut sessions = BTreeSet::new();
294
295 for event in events {
296 let event_id = event.stable_id();
297 if let Some(session_id) = &event.session_id
298 && sessions.insert(session_id.clone())
299 {
300 projection.nodes.push(
301 GraphNode::new(
302 format!("memsess:{}", blake3::hash(session_id.as_bytes()).to_hex()),
303 "memory_session",
304 session_id,
305 )
306 .with_property("session_id", session_id)
307 .with_provenance(GraphProvenance::new("tsift-memory", session_id)),
308 );
309 }
310
311 let mut node = GraphNode::new(&event_id, "memory_event", event.kind.as_str())
312 .with_property("event_kind", event.kind.as_str())
313 .with_property("source_ref", &event.source_ref)
314 .with_property("token_estimate", event.token_estimate.to_string())
315 .with_provenance(GraphProvenance::new("tsift-memory", &event.source_ref));
316 if let Some(imported_from) = &event.imported_from {
317 node = node.with_property("imported_from", imported_from);
318 }
319 if let Some(imported_id) = &event.imported_id {
320 node = node.with_property("imported_id", imported_id);
321 }
322 projection.nodes.push(node);
323
324 if let Some(session_id) = &event.session_id {
325 let session_node_id =
326 format!("memsess:{}", blake3::hash(session_id.as_bytes()).to_hex());
327 projection.edges.push(
328 GraphEdge::new(session_node_id, event_id, "records_memory_event")
329 .with_provenance(GraphProvenance::new("tsift-memory", &event.source_ref)),
330 );
331 }
332 }
333
334 projection
335}
336
337#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
338pub struct MemoryDecayConfig {
339 pub half_life_secs: f64,
340 pub lexical_weight: f64,
341 pub recency_weight: f64,
342}
343
344impl Default for MemoryDecayConfig {
345 fn default() -> Self {
346 Self {
347 half_life_secs: 7.0 * 24.0 * 3600.0,
348 lexical_weight: 0.6,
349 recency_weight: 0.4,
350 }
351 }
352}
353
354#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
355pub struct ScoredMemoryEvent {
356 pub event: MemoryEvent,
357 pub lexical_score: f64,
358 pub recency_score: f64,
359 pub score: f64,
360}
361
362fn memory_query_terms(query: &str) -> Vec<String> {
363 query
364 .split(|c: char| !c.is_alphanumeric())
365 .filter(|term| !term.is_empty())
366 .map(|term| term.to_lowercase())
367 .collect()
368}
369
370fn memory_lexical_overlap(terms: &[String], text: &str) -> f64 {
371 if terms.is_empty() {
372 return 0.0;
373 }
374 let haystack = text.to_lowercase();
375 let hits = terms
376 .iter()
377 .filter(|term| haystack.contains(term.as_str()))
378 .count();
379 hits as f64 / terms.len() as f64
380}
381
382fn memory_recency_decay(observed_at_unix: Option<i64>, now_unix: i64, half_life_secs: f64) -> f64 {
383 match observed_at_unix {
384 Some(observed) => {
385 let age = (now_unix - observed).max(0) as f64;
386 0.5f64.powf(age / half_life_secs.max(1.0))
387 }
388 None => 0.0,
389 }
390}
391
392pub fn rank_memory_events(
393 events: &[MemoryEvent],
394 query: &str,
395 now_unix: i64,
396 config: MemoryDecayConfig,
397 limit: usize,
398) -> Vec<ScoredMemoryEvent> {
399 let terms = memory_query_terms(query);
400 let mut scored: Vec<ScoredMemoryEvent> = events
401 .iter()
402 .map(|event| {
403 let lexical_score = memory_lexical_overlap(&terms, &event.text);
404 let recency_score =
405 memory_recency_decay(event.observed_at_unix, now_unix, config.half_life_secs);
406 let score =
407 config.lexical_weight * lexical_score + config.recency_weight * recency_score;
408 ScoredMemoryEvent {
409 event: event.clone(),
410 lexical_score,
411 recency_score,
412 score,
413 }
414 })
415 .collect();
416 scored.sort_by(|a, b| {
417 b.score
418 .partial_cmp(&a.score)
419 .unwrap_or(std::cmp::Ordering::Equal)
420 .then_with(|| {
421 b.recency_score
422 .partial_cmp(&a.recency_score)
423 .unwrap_or(std::cmp::Ordering::Equal)
424 })
425 });
426 scored.truncate(limit);
427 scored
428}
429
430pub fn memory_rank_candidate_limit(limit: usize) -> usize {
431 if limit == 0 {
432 return 0;
433 }
434 limit
435 .saturating_mul(MEMORY_RANK_CANDIDATE_MULTIPLIER)
436 .min(DEFAULT_MEMORY_CANDIDATE_LIMIT.max(limit))
437}
438
439pub fn rank_memory_event_candidates(
440 memory_db: &Path,
441 query: &str,
442 now_unix: i64,
443 config: MemoryDecayConfig,
444 limit: usize,
445) -> Result<Vec<ScoredMemoryEvent>> {
446 let candidate_limit = memory_rank_candidate_limit(limit);
447 let candidates = read_memory_event_candidates(memory_db, query, candidate_limit)?;
448 Ok(rank_memory_events(
449 &candidates,
450 query,
451 now_unix,
452 config,
453 limit,
454 ))
455}
456
457#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
458pub struct MemoryQueryPlan {
459 pub contract_version: String,
460 pub query: String,
461 pub limit: usize,
462 pub candidate_limit: usize,
463 pub max_tokens: usize,
464 pub estimated_query_tokens: usize,
465 pub decay: MemoryDecayConfig,
466 pub output_contract: Vec<String>,
467 pub next_commands: Vec<String>,
468}
469
470pub fn plan_memory_query(query: &str, limit: usize, max_tokens: usize) -> Result<MemoryQueryPlan> {
471 if query.trim().is_empty() {
472 bail!("memory query must not be empty");
473 }
474 Ok(MemoryQueryPlan {
475 contract_version: MEMGRAPHRAG_CONTRACT_VERSION.to_string(),
476 query: query.to_string(),
477 limit,
478 candidate_limit: memory_rank_candidate_limit(limit),
479 max_tokens,
480 estimated_query_tokens: estimate_tokens(query),
481 decay: MemoryDecayConfig::default(),
482 output_contract: vec![
483 "indexed FTS/recent candidate set capped before ranking".to_string(),
484 "decay-weighted ranked memory_event ids (lexical + recency)".to_string(),
485 "per-event lexical_score, recency_score, and blended score".to_string(),
486 "source_ref handles for expansion".to_string(),
487 "graph node ids for neighborhood projection".to_string(),
488 "token estimates for every returned packet".to_string(),
489 ],
490 next_commands: vec![
491 "tsift memory status . --json".to_string(),
492 "tsift memory project-graph . --json".to_string(),
493 "tsift graph-db --path . --json related '<query>'".to_string(),
494 ],
495 })
496}
497
498#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
499pub struct MemoryGraphProjectReport {
500 pub events_projected: usize,
501 pub nodes_upserted: usize,
502 pub edges_upserted: usize,
503 pub read_policy: MemoryReadPolicy,
504 pub source_watermark: String,
505 pub content_hash: String,
506 pub events_available: usize,
507}
508
509pub fn project_memory_into_graph(
510 memory_db: &Path,
511 graph_db: &Path,
512 limit: usize,
513) -> Result<MemoryGraphProjectReport> {
514 project_memory_into_graph_with_policy(memory_db, graph_db, limit, &MemoryReadPolicy::default())
515}
516
517pub fn project_memory_into_graph_with_policy(
518 memory_db: &Path,
519 graph_db: &Path,
520 limit: usize,
521 read_policy: &MemoryReadPolicy,
522) -> Result<MemoryGraphProjectReport> {
523 let events = read_memory_events_with_policy(memory_db, read_policy, limit)?;
524 let watermark = memory_read_watermark(memory_db, read_policy, limit, &events)?;
525 let mut projection = project_memory_events(&events);
526 append_memory_projection_metadata(&mut projection, &watermark)?;
527 let nodes_upserted = projection.nodes.len();
528 let edges_upserted = projection.edges.len();
529 if let Some(parent) = graph_db.parent() {
530 std::fs::create_dir_all(parent)
531 .with_context(|| format!("create graph db dir {}", parent.display()))?;
532 }
533 let mut store = SqliteGraphStore::open(graph_db)
534 .with_context(|| format!("open graph store {}", graph_db.display()))?;
535 store.upsert_projection(&projection)?;
536 Ok(MemoryGraphProjectReport {
537 events_projected: events.len(),
538 nodes_upserted,
539 edges_upserted,
540 read_policy: read_policy.clone(),
541 source_watermark: watermark.source_watermark,
542 content_hash: watermark.content_hash,
543 events_available: watermark.events_available,
544 })
545}
546
547fn append_memory_projection_metadata(
548 projection: &mut GraphProjection,
549 watermark: &MemoryReadWatermark,
550) -> Result<()> {
551 let mut node = GraphNode::new(
552 MEMORY_PROJECTION_NODE_ID,
553 "memory_projection",
554 "tsift-memory graph projection",
555 )
556 .with_property("handle", MEMORY_PROJECTION_NODE_ID)
557 .with_property("ref_id", "tsift-memory")
558 .with_property("provider", "tsift-memory")
559 .with_property("read_policy", watermark.policy.order.as_str())
560 .with_property("limit", watermark.limit.to_string())
561 .with_property("events_read", watermark.events_read.to_string())
562 .with_property("events_available", watermark.events_available.to_string())
563 .with_property("source_watermark", watermark.source_watermark.clone())
564 .with_property("content_hash", watermark.content_hash.clone())
565 .with_provenance(
566 GraphProvenance::new("tsift-memory", "memory_events")
567 .with_content_hash(watermark.content_hash.clone()),
568 )
569 .with_freshness(GraphFreshness::content_hash(
570 watermark.source_watermark.clone(),
571 ));
572 if let Some(query) = &watermark.policy.query {
573 node = node.with_property("query", query.clone());
574 }
575 if let Some(max_rowid) = watermark.max_rowid {
576 node = node.with_property("max_rowid", max_rowid.to_string());
577 }
578 if let Some(max_observed_at_unix) = watermark.max_observed_at_unix {
579 node = node.with_property("max_observed_at_unix", max_observed_at_unix.to_string());
580 }
581 if let Some(max_created_at_unix) = watermark.max_created_at_unix {
582 node = node.with_property("max_created_at_unix", max_created_at_unix.to_string());
583 }
584 projection.nodes.push(node_with_content_freshness(node)?);
585 Ok(())
586}
587
588#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
589pub struct MemoryOntologyGraphReport {
590 pub type_nodes: usize,
591 pub relations: usize,
592}
593
594pub fn derive_memory_ontology_graph(graph_db: &Path) -> Result<MemoryOntologyGraphReport> {
595 if !graph_db.exists() {
596 bail!(
597 "graph store {} does not exist; run `tsift graph-db refresh` or `tsift memory project-graph` first",
598 graph_db.display()
599 );
600 }
601 let mut store = SqliteGraphStore::open(graph_db)
602 .with_context(|| format!("open graph store {}", graph_db.display()))?;
603 let ontology = store.derive_ontology()?;
604 let type_nodes = ontology.nodes.len();
605 let relations = ontology.edges.len();
606 store.upsert_projection(&ontology)?;
607 Ok(MemoryOntologyGraphReport {
608 type_nodes,
609 relations,
610 })
611}
612
613pub fn append_tsift_memory_graph_projection_rows(
614 root: &Path,
615 nodes: &mut Vec<GraphNode>,
616 edges: &mut Vec<GraphEdge>,
617) -> Result<()> {
618 append_tsift_memory_graph_projection_rows_with_limit(
619 root,
620 nodes,
621 edges,
622 DEFAULT_TRAVERSAL_MEMORY_EVENT_LIMIT,
623 )
624}
625
626pub fn append_tsift_memory_graph_projection_rows_with_limit(
627 root: &Path,
628 nodes: &mut Vec<GraphNode>,
629 edges: &mut Vec<GraphEdge>,
630 event_limit: usize,
631) -> Result<()> {
632 let memory_db = tsift_memory::default_memory_db_path(root);
633 if !memory_db.exists() {
634 return Ok(());
635 }
636 let events = match read_memory_events(&memory_db, event_limit) {
637 Ok(events) => events,
638 Err(_) => return Ok(()),
639 };
640 append_memory_events_as_traversal_rows(root, &events, nodes, edges)
641}
642
643pub fn append_memory_events_as_traversal_rows(
644 root: &Path,
645 events: &[MemoryEvent],
646 nodes: &mut Vec<GraphNode>,
647 edges: &mut Vec<GraphEdge>,
648) -> Result<()> {
649 append_memory_events_as_traversal_rows_with_provider(
650 root,
651 events,
652 nodes,
653 edges,
654 &HashSemanticProvider,
655 )
656}
657
658pub fn append_memory_events_as_traversal_rows_with_provider<P: SemanticProvider + ?Sized>(
659 root: &Path,
660 events: &[MemoryEvent],
661 nodes: &mut Vec<GraphNode>,
662 edges: &mut Vec<GraphEdge>,
663 semantic_provider: &P,
664) -> Result<()> {
665 if events.is_empty() {
666 return Ok(());
667 }
668
669 let mut seen_sessions = BTreeSet::new();
670 let mut edge_map = BTreeMap::<(String, String, String), GraphEdge>::new();
671 let provider_metadata = semantic_provider.metadata();
672
673 for event in events {
674 let event_id = event.stable_id();
675 let event_key = memory_event_key(event);
676 let source_handle = stable_handle("tmemsrc", &event_key);
677 let provenance = GraphProvenance::new("tsift-memory", &event.source_ref);
678 let imported_from = event.imported_from.as_deref().unwrap_or("native");
679
680 if let Some(session_id) = &event.session_id {
681 let session_handle =
682 format!("memsess:{}", blake3::hash(session_id.as_bytes()).to_hex());
683 if seen_sessions.insert(session_id.clone()) {
684 let session_node = GraphNode::new(
685 session_handle.clone(),
686 "memory_session",
687 truncate_for_compact(session_id, 80),
688 )
689 .with_property("handle", session_handle.clone())
690 .with_property("ref_id", session_id.clone())
691 .with_property("session_id", session_id.clone())
692 .with_property("provider", "tsift-memory")
693 .with_property(
694 "expand",
695 format!(
696 "tsift memory status {} --json",
697 shell_quote(root.to_string_lossy().as_ref())
698 ),
699 )
700 .with_provenance(provenance.clone());
701 nodes.push(node_with_content_freshness(session_node)?);
702 }
703
704 insert_semantic_edge(
705 &mut edge_map,
706 GraphEdge::new(
707 session_handle.clone(),
708 event_id.clone(),
709 "records_memory_event",
710 )
711 .with_property("label", "tsift-memory session event")
712 .with_provenance(provenance.clone()),
713 );
714 insert_semantic_edge(
715 &mut edge_map,
716 GraphEdge::new(
717 session_handle,
718 source_handle.clone(),
719 "records_memory_source",
720 )
721 .with_property("label", "tsift-memory session source")
722 .with_provenance(provenance.clone()),
723 );
724 }
725
726 let label = memory_event_label(event);
727 let mut event_node = GraphNode::new(event_id.clone(), "memory_event", event.kind.as_str())
728 .with_property("handle", event_id.clone())
729 .with_property("ref_id", event.source_ref.clone())
730 .with_property("source_ref", event.source_ref.clone())
731 .with_property("provider", "tsift-memory")
732 .with_property("memory_kind", event.kind.as_str())
733 .with_property("imported_from", imported_from)
734 .with_property("text_preview", truncate_for_compact(&event.text, 240))
735 .with_property("token_estimate", event.token_estimate.to_string())
736 .with_property(
737 "expand",
738 format!(
739 "tsift memory status {} --json",
740 shell_quote(root.to_string_lossy().as_ref())
741 ),
742 )
743 .with_provenance(provenance.clone());
744 if let Some(session_id) = &event.session_id {
745 event_node = event_node.with_property("session_id", session_id.clone());
746 }
747 if let Some(observed_at_unix) = event.observed_at_unix {
748 event_node = event_node.with_property("observed_at_unix", observed_at_unix.to_string());
749 }
750 if let Some(imported_id) = &event.imported_id {
751 event_node = event_node.with_property("imported_id", imported_id.clone());
752 }
753 nodes.push(node_with_content_freshness(event_node)?);
754
755 let mut source_node = GraphNode::new(source_handle.clone(), "source_handle", label.clone())
756 .with_property("handle", source_handle.clone())
757 .with_property("ref_id", event.source_ref.clone())
758 .with_property("source_ref", event.source_ref.clone())
759 .with_property("provider", "tsift-memory")
760 .with_property("memory_kind", event.kind.as_str())
761 .with_property("imported_from", imported_from)
762 .with_property("text_preview", truncate_for_compact(&event.text, 240))
763 .with_property("token_estimate", event.token_estimate.to_string())
764 .with_property(
765 "expand",
766 format!(
767 "tsift memory status {} --json",
768 shell_quote(root.to_string_lossy().as_ref())
769 ),
770 )
771 .with_provenance(provenance.clone());
772 if let Some(session_id) = &event.session_id {
773 source_node = source_node.with_property("session_id", session_id.clone());
774 }
775 if let Some(observed_at_unix) = event.observed_at_unix {
776 source_node =
777 source_node.with_property("observed_at_unix", observed_at_unix.to_string());
778 }
779 if let Some(imported_id) = &event.imported_id {
780 source_node = source_node.with_property("imported_id", imported_id.clone());
781 }
782 nodes.push(node_with_content_freshness(source_node)?);
783
784 insert_semantic_edge(
785 &mut edge_map,
786 GraphEdge::new(event_id.clone(), source_handle.clone(), "projects_source")
787 .with_property("label", "tsift-memory source projection")
788 .with_provenance(provenance.clone()),
789 );
790
791 let semantic_input = SemanticProviderInput::from_event(event, label.clone(), imported_from);
792 for (concept_index, concept) in semantic_provider
793 .extract_concepts(&semantic_input)?
794 .into_iter()
795 .enumerate()
796 {
797 let semantic_handle =
798 semantic_concept_handle(&event_key, concept_index, &concept.stable_key);
799 let embedding = semantic_provider.embed(&concept.embedding_text)?;
800 let semantic_node = GraphNode::new(
801 semantic_handle.clone(),
802 "semantic_concept",
803 concept.label.clone(),
804 )
805 .with_property("handle", semantic_handle.clone())
806 .with_property("ref_id", event.source_ref.clone())
807 .with_property("detail", concept.detail.clone())
808 .with_property("source_ref", event.source_ref.clone())
809 .with_property("provider", "tsift-memory")
810 .with_property("memory_kind", event.kind.as_str())
811 .with_property("imported_from", imported_from)
812 .with_property("semantic_provider", provider_metadata.provider_id.clone())
813 .with_property(
814 "semantic_provider_kind",
815 provider_kind_name(&provider_metadata.provider_kind),
816 )
817 .with_property(
818 "semantic_extraction_model",
819 provider_metadata.extraction_model.clone(),
820 )
821 .with_property("semantic_key", concept.stable_key.clone())
822 .with_property("embedding_provider", embedding.provider_id.clone())
823 .with_property("embedding_model", embedding.model.clone())
824 .with_property("embedding_dimensions", embedding.dimensions().to_string())
825 .with_property("embedding", embedding.to_property())
826 .with_property(
827 "expand",
828 semantic_related_command(root, &concept.label, SemanticRelatedKind::Concept),
829 )
830 .with_provenance(provenance.clone());
831 nodes.push(node_with_content_freshness(semantic_node)?);
832
833 insert_semantic_edge(
834 &mut edge_map,
835 GraphEdge::new(
836 source_handle.clone(),
837 semantic_handle.clone(),
838 "mentions_concept",
839 )
840 .with_property("label", "tsift-memory semantic source")
841 .with_property("semantic_provider", provider_metadata.provider_id.clone())
842 .with_provenance(provenance.clone()),
843 );
844 }
845 }
846
847 for edge in edge_map.into_values() {
848 edges.push(edge_with_content_freshness(edge)?);
849 }
850
851 Ok(())
852}
853
854fn memory_event_key(event: &MemoryEvent) -> String {
855 match (event.imported_from.as_deref(), event.imported_id.as_deref()) {
856 (Some(imported_from), Some(imported_id)) => {
857 format!("{imported_from}:{imported_id}")
858 }
859 _ => event.stable_id(),
860 }
861}
862
863fn memory_event_label(event: &MemoryEvent) -> String {
864 let first_line = event
865 .text
866 .lines()
867 .map(str::trim)
868 .find(|line| !line.is_empty())
869 .unwrap_or(event.kind.as_str());
870 match event.kind.as_str() {
871 "imported_observation" => {
872 let observation_type = event
873 .metadata
874 .get("observation_type")
875 .map(String::as_str)
876 .unwrap_or("observation");
877 truncate_for_compact(&format!("{observation_type}: {first_line}"), 80)
878 }
879 "imported_session_summary" => truncate_for_compact(&format!("summary: {first_line}"), 80),
880 "imported_user_prompt" => truncate_for_compact(&format!("prompt: {first_line}"), 80),
881 _ => truncate_for_compact(first_line, 80),
882 }
883}
884
885fn truncate_for_compact(input: &str, max_chars: usize) -> String {
886 let trimmed = input.trim();
887 let count = trimmed.chars().count();
888 if count <= max_chars {
889 return trimmed.to_string();
890 }
891 let prefix: String = trimmed.chars().take(max_chars.saturating_sub(3)).collect();
892 format!("{prefix}...")
893}
894
895fn stable_handle(prefix: &str, key: &str) -> String {
896 let mut hasher = blake3::Hasher::new();
897 hasher.update(prefix.as_bytes());
898 hasher.update(&[0]);
899 hasher.update(key.as_bytes());
900 let hex = hasher.finalize().to_hex();
901 format!("{prefix}-{}", &hex[..10])
902}
903
904fn semantic_concept_handle(event_key: &str, index: usize, stable_key: &str) -> String {
905 if index == 0 && stable_key == "primary" {
906 stable_handle("tmemsem", event_key)
907 } else {
908 stable_handle("tmemsem", &format!("{event_key}:{stable_key}"))
909 }
910}
911
912fn provider_kind_name(provider_kind: &ProviderKind) -> &'static str {
913 match provider_kind {
914 ProviderKind::LlamaCpp => "llama.cpp",
915 ProviderKind::Ollama => "ollama",
916 ProviderKind::Vllm => "vllm",
917 ProviderKind::HashFallback => "hash_fallback",
918 }
919}
920
921fn content_hash<T: Serialize>(value: &T) -> Result<String> {
922 let bytes = serde_json::to_vec(value)?;
923 Ok(blake3::hash(&bytes).to_hex().to_string())
924}
925
926fn node_with_content_freshness(mut node: GraphNode) -> Result<GraphNode> {
927 let mut hashable = node.clone();
928 hashable.freshness = None;
929 node.freshness = Some(GraphFreshness::content_hash(content_hash(&hashable)?));
930 Ok(node)
931}
932
933fn edge_with_content_freshness(mut edge: GraphEdge) -> Result<GraphEdge> {
934 let mut hashable = edge.clone();
935 hashable.freshness = None;
936 edge.freshness = Some(GraphFreshness::content_hash(content_hash(&hashable)?));
937 Ok(edge)
938}
939
940#[derive(Clone, Copy)]
941enum SemanticRelatedKind {
942 Concept,
943}
944
945fn semantic_related_kind_name(kind: SemanticRelatedKind) -> &'static str {
946 match kind {
947 SemanticRelatedKind::Concept => "concept",
948 }
949}
950
951fn semantic_related_command(root: &Path, query: &str, kind: SemanticRelatedKind) -> String {
952 format!(
953 "tsift semantic {} --path {} --kind {} --limit 10",
954 shell_quote(query),
955 shell_quote(root.to_string_lossy().as_ref()),
956 semantic_related_kind_name(kind)
957 )
958}
959
960fn semantic_embedding(input: &str) -> Vec<f64> {
961 let mut vector = vec![0.0; SEMANTIC_EMBEDDING_DIM];
962 let mut tokens = traversal_tokens(input);
963 if tokens.is_empty() {
964 let trimmed = input.trim().to_ascii_lowercase();
965 if !trimmed.is_empty() {
966 tokens.insert(trimmed);
967 }
968 }
969
970 for token in tokens {
971 let hash = blake3::hash(token.as_bytes());
972 let bytes = hash.as_bytes();
973 let idx = usize::from(bytes[0]) % SEMANTIC_EMBEDDING_DIM;
974 let sign = if bytes[1] & 1 == 0 { 1.0 } else { -1.0 };
975 vector[idx] += sign;
976 }
977
978 let norm = vector.iter().map(|value| value * value).sum::<f64>().sqrt();
979 if norm > 0.0 {
980 for value in &mut vector {
981 *value /= norm;
982 }
983 }
984 vector
985}
986
987fn traversal_tokens(input: &str) -> BTreeSet<String> {
988 input
989 .split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '_' || ch == '-'))
990 .flat_map(|part| part.split(['_', '-']))
991 .map(str::trim)
992 .filter(|part| part.len() >= 3)
993 .map(|part| part.to_ascii_lowercase())
994 .collect()
995}
996
997fn insert_semantic_edge(
998 edge_map: &mut BTreeMap<(String, String, String), GraphEdge>,
999 edge: GraphEdge,
1000) {
1001 edge_map
1002 .entry((edge.from_id.clone(), edge.to_id.clone(), edge.kind.clone()))
1003 .or_insert(edge);
1004}
1005
1006fn shell_quote(s: &str) -> String {
1007 let unquoted =
1008 if (s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')) {
1009 &s[1..s.len() - 1]
1010 } else {
1011 s
1012 };
1013
1014 if unquoted
1015 .chars()
1016 .all(|c| c.is_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '/')
1017 {
1018 format!("\"{}\"", unquoted)
1019 } else {
1020 format!(
1021 "\"{}\"",
1022 unquoted.replace('\\', "\\\\").replace('"', "\\\"")
1023 )
1024 }
1025}
1026
1027#[cfg(test)]
1028mod tests {
1029 use super::*;
1030 use tempfile::TempDir;
1031 use tsift_memory::{MemoryEventKind, MemoryStore, default_memory_db_path};
1032
1033 #[test]
1034 fn project_memory_events_links_events_to_sessions() {
1035 let event = MemoryEvent::new(MemoryEventKind::ResponseSummary, "session.md", "done")
1036 .with_session_id("session-a");
1037 let projection = project_memory_events(&[event]);
1038 assert_eq!(projection.nodes.len(), 2);
1039 assert_eq!(projection.edges.len(), 1);
1040 assert!(
1041 projection
1042 .nodes
1043 .iter()
1044 .any(|node| node.kind == "memory_session")
1045 );
1046 assert!(
1047 projection
1048 .nodes
1049 .iter()
1050 .any(|node| node.kind == "memory_event")
1051 );
1052 }
1053
1054 #[test]
1055 fn rank_memory_events_prefers_recent_relevant_events() {
1056 let now = 1_700_000_000;
1057 let old = MemoryEvent::new(
1058 MemoryEventKind::ResponseSummary,
1059 "old",
1060 "graph retrieval design shipped",
1061 )
1062 .with_observed_at_unix(now - 30 * 24 * 3600);
1063 let recent = MemoryEvent::new(
1064 MemoryEventKind::ResponseSummary,
1065 "recent",
1066 "graph retrieval follow-up",
1067 )
1068 .with_observed_at_unix(now - 60);
1069 let config = MemoryDecayConfig {
1070 half_life_secs: 7.0 * 24.0 * 3600.0,
1071 lexical_weight: 0.5,
1072 recency_weight: 0.5,
1073 };
1074 let ranked = rank_memory_events(&[old, recent], "graph retrieval", now, config, 10);
1075 assert_eq!(ranked[0].event.source_ref, "recent");
1076 }
1077
1078 #[test]
1079 fn rank_memory_events_keeps_lexical_hits_without_timestamp() {
1080 let now = 1_700_000_000;
1081 let event = MemoryEvent::new(
1082 MemoryEventKind::ResponseSummary,
1083 "untimed",
1084 "semantic graph memory",
1085 );
1086 let off_topic_fresh = MemoryEvent::new(
1087 MemoryEventKind::ResponseSummary,
1088 "fresh",
1089 "unrelated build log output",
1090 )
1091 .with_observed_at_unix(now - 10);
1092 let config = MemoryDecayConfig::default();
1093 let ranked = rank_memory_events(
1094 &[event.clone(), off_topic_fresh],
1095 "semantic graph memory",
1096 now,
1097 config,
1098 10,
1099 );
1100 assert_eq!(ranked[0].event.source_ref, event.source_ref);
1101 }
1102
1103 #[test]
1104 fn rank_memory_event_candidates_bounds_db_candidates_before_scoring() {
1105 let dir = TempDir::new().unwrap();
1106 let memory_db = default_memory_db_path(dir.path());
1107 std::fs::create_dir_all(memory_db.parent().unwrap()).unwrap();
1108 let store = MemoryStore::open_or_create(&memory_db).unwrap();
1109 let now = 1_700_000_000;
1110 for index in 0..40 {
1111 store
1112 .insert_event(
1113 &MemoryEvent::new(
1114 MemoryEventKind::ResponseSummary,
1115 format!("old-{index}"),
1116 format!("ordinary memory event {index}"),
1117 )
1118 .with_observed_at_unix(now - 20_000 - index),
1119 )
1120 .unwrap();
1121 }
1122 store
1123 .insert_event(
1124 &MemoryEvent::new(
1125 MemoryEventKind::ResponseSummary,
1126 "needle",
1127 "semantic needle graph retrieval",
1128 )
1129 .with_observed_at_unix(now - 30_000),
1130 )
1131 .unwrap();
1132 store
1133 .insert_event(
1134 &MemoryEvent::new(
1135 MemoryEventKind::ResponseSummary,
1136 "recent",
1137 "fresh unrelated release note",
1138 )
1139 .with_observed_at_unix(now - 10),
1140 )
1141 .unwrap();
1142
1143 assert_eq!(memory_rank_candidate_limit(2), 16);
1144 let ranked = rank_memory_event_candidates(
1145 &memory_db,
1146 "semantic needle",
1147 now,
1148 MemoryDecayConfig::default(),
1149 2,
1150 )
1151 .unwrap();
1152 assert_eq!(ranked.len(), 2);
1153 assert!(
1154 ranked
1155 .iter()
1156 .any(|scored| scored.event.source_ref == "needle")
1157 );
1158 assert!(
1159 ranked
1160 .iter()
1161 .any(|scored| scored.event.source_ref == "recent")
1162 );
1163 }
1164
1165 #[test]
1166 fn plan_memory_query_carries_default_decay_config() {
1167 let plan = plan_memory_query("graph rag", 5, 1500).unwrap();
1168 assert_eq!(plan.decay, MemoryDecayConfig::default());
1169 assert_eq!(plan.candidate_limit, 40);
1170 assert!(
1171 plan.output_contract
1172 .iter()
1173 .any(|contract| contract.contains("candidate set capped before ranking"))
1174 );
1175 assert!(
1176 plan.next_commands
1177 .iter()
1178 .any(|cmd| cmd.contains("project-graph"))
1179 );
1180 }
1181
1182 #[test]
1183 fn project_memory_into_graph_persists_memory_nodes() {
1184 let dir = TempDir::new().unwrap();
1185 let root = dir.path();
1186 let memory_db = default_memory_db_path(root);
1187 std::fs::create_dir_all(memory_db.parent().unwrap()).unwrap();
1188
1189 let store = MemoryStore::open_or_create(&memory_db).unwrap();
1190 let mut prompt = MemoryEvent::new(
1191 MemoryEventKind::PromptTarget,
1192 "session.md",
1193 "run the gated backlog items",
1194 );
1195 prompt.session_id = Some("sess-1".to_string());
1196 prompt.observed_at_unix = Some(1_700_000_000);
1197 let mut response = MemoryEvent::new(
1198 MemoryEventKind::ResponseSummary,
1199 "session.md",
1200 "decay weighted retrieval shipped",
1201 );
1202 response.session_id = Some("sess-1".to_string());
1203 response.observed_at_unix = Some(1_700_000_100);
1204 store.insert_event(&prompt).unwrap();
1205 store.insert_event(&response).unwrap();
1206
1207 let graph_db = root.join(".tsift").join("graph.db");
1208 let report = project_memory_into_graph(&memory_db, &graph_db, 100).unwrap();
1209 assert_eq!(report.events_projected, 2);
1210 assert!(
1211 report.nodes_upserted >= 3,
1212 "two events + one session node, got {}",
1213 report.nodes_upserted
1214 );
1215 assert!(
1216 report.edges_upserted >= 2,
1217 "session records each event, got {}",
1218 report.edges_upserted
1219 );
1220
1221 let conn = rusqlite::Connection::open(&graph_db).unwrap();
1222 let memory_events: i64 = conn
1223 .query_row(
1224 "SELECT COUNT(*) FROM graph_nodes WHERE kind = 'memory_event'",
1225 [],
1226 |row| row.get(0),
1227 )
1228 .unwrap();
1229 assert_eq!(memory_events, 2);
1230 let sessions: i64 = conn
1231 .query_row(
1232 "SELECT COUNT(*) FROM graph_nodes WHERE kind = 'memory_session'",
1233 [],
1234 |row| row.get(0),
1235 )
1236 .unwrap();
1237 assert_eq!(sessions, 1);
1238 }
1239
1240 #[test]
1241 fn traversal_projection_adds_semantic_memory_rows() {
1242 let dir = TempDir::new().unwrap();
1243 let event = MemoryEvent::new(
1244 MemoryEventKind::ResponseSummary,
1245 "session.md",
1246 "semantic memory graph",
1247 )
1248 .with_session_id("sess-1")
1249 .with_observed_at_unix(1_700_000_000);
1250 let mut nodes = Vec::new();
1251 let mut edges = Vec::new();
1252 append_memory_events_as_traversal_rows(dir.path(), &[event], &mut nodes, &mut edges)
1253 .unwrap();
1254
1255 assert!(nodes.iter().any(|node| node.kind == "memory_event"));
1256 assert!(nodes.iter().any(|node| {
1257 node.kind == "semantic_concept"
1258 && node.properties.get("provider") == Some(&"tsift-memory".to_string())
1259 && node.properties.get("semantic_provider")
1260 == Some(&HASH_SEMANTIC_PROVIDER_ID.to_string())
1261 && node.properties.get("semantic_provider_kind")
1262 == Some(&"hash_fallback".to_string())
1263 && node.properties.get("embedding_model")
1264 == Some(&SEMANTIC_EMBEDDING_MODEL.to_string())
1265 && node.properties.get("embedding_dimensions") == Some(&"32".to_string())
1266 }));
1267 assert!(edges.iter().any(|edge| edge.kind == "mentions_concept"));
1268 }
1269
1270 #[derive(Debug)]
1271 struct FixtureSemanticProvider;
1272
1273 impl SemanticProvider for FixtureSemanticProvider {
1274 fn metadata(&self) -> SemanticProviderMetadata {
1275 SemanticProviderMetadata {
1276 provider_id: "fixture-local-provider".to_string(),
1277 provider_kind: ProviderKind::LlamaCpp,
1278 extraction_model: "fixture-extractor".to_string(),
1279 embedding_model: "fixture-embedder".to_string(),
1280 }
1281 }
1282
1283 fn extract_concepts(
1284 &self,
1285 input: &SemanticProviderInput,
1286 ) -> Result<Vec<SemanticConceptCandidate>> {
1287 assert_eq!(input.source_ref, "session.md");
1288 Ok(vec![SemanticConceptCandidate::new(
1289 "fixture-concept",
1290 "provider extracted concept",
1291 "semantic row from fixture provider",
1292 "provider extracted concept embedding text",
1293 )])
1294 }
1295
1296 fn embed(&self, input: &str) -> Result<SemanticEmbedding> {
1297 assert!(input.contains("embedding text"));
1298 Ok(SemanticEmbedding::new(
1299 "fixture-local-provider",
1300 "fixture-embedder",
1301 vec![1.0, 0.0, -1.0],
1302 ))
1303 }
1304 }
1305
1306 #[test]
1307 fn traversal_projection_uses_injected_semantic_provider() {
1308 let dir = TempDir::new().unwrap();
1309 let event = MemoryEvent::new(
1310 MemoryEventKind::ResponseSummary,
1311 "session.md",
1312 "semantic provider graph",
1313 )
1314 .with_session_id("sess-1")
1315 .with_observed_at_unix(1_700_000_000);
1316 let mut nodes = Vec::new();
1317 let mut edges = Vec::new();
1318 append_memory_events_as_traversal_rows_with_provider(
1319 dir.path(),
1320 &[event],
1321 &mut nodes,
1322 &mut edges,
1323 &FixtureSemanticProvider,
1324 )
1325 .unwrap();
1326
1327 let semantic = nodes
1328 .iter()
1329 .find(|node| node.kind == "semantic_concept")
1330 .expect("expected semantic concept from fixture provider");
1331 assert_eq!(semantic.label, "provider extracted concept");
1332 assert_eq!(
1333 semantic.properties.get("semantic_provider"),
1334 Some(&"fixture-local-provider".to_string())
1335 );
1336 assert_eq!(
1337 semantic.properties.get("semantic_provider_kind"),
1338 Some(&"llama.cpp".to_string())
1339 );
1340 assert_eq!(
1341 semantic.properties.get("semantic_extraction_model"),
1342 Some(&"fixture-extractor".to_string())
1343 );
1344 assert_eq!(
1345 semantic.properties.get("embedding_provider"),
1346 Some(&"fixture-local-provider".to_string())
1347 );
1348 assert_eq!(
1349 semantic.properties.get("embedding_model"),
1350 Some(&"fixture-embedder".to_string())
1351 );
1352 assert_eq!(
1353 semantic.properties.get("embedding_dimensions"),
1354 Some(&"3".to_string())
1355 );
1356 assert_eq!(
1357 semantic.properties.get("embedding"),
1358 Some(&"1.000000,0.000000,-1.000000".to_string())
1359 );
1360 assert!(edges.iter().any(|edge| {
1361 edge.kind == "mentions_concept"
1362 && edge.properties.get("semantic_provider")
1363 == Some(&"fixture-local-provider".to_string())
1364 }));
1365 }
1366
1367 struct StubKgExtractor {
1370 payload_json: String,
1371 metadata: tsift_kg::KgExtractorMetadata,
1372 }
1373
1374 impl KgExtractor for StubKgExtractor {
1375 fn metadata(&self) -> tsift_kg::KgExtractorMetadata {
1376 self.metadata.clone()
1377 }
1378 fn extract_json(&self, _chunk: &tsift_kg::KgChunk) -> Result<String> {
1379 Ok(self.payload_json.clone())
1380 }
1381 }
1382
1383 fn stub_extractor() -> StubKgExtractor {
1384 StubKgExtractor {
1385 payload_json: r#"{"entities":[
1386 {"id":"e0","label":"tsift-kg","kind":"crate","description":"KG extraction crate","confidence":0.9},
1387 {"id":"e1","label":"OllamaKgExtractor","kind":"struct","confidence":0.8}
1388 ],"relations":[]}"#
1389 .to_string(),
1390 metadata: tsift_kg::KgExtractorMetadata {
1391 provider_id: "stub-kg-provider".to_string(),
1392 provider_kind: ProviderKind::Ollama,
1393 extraction_model: "stub-model".to_string(),
1394 },
1395 }
1396 }
1397
1398 #[test]
1399 fn kg_semantic_provider_metadata_propagates_extractor_metadata() {
1400 let provider = KgSemanticProvider::with_extractor(stub_extractor());
1401 let metadata = provider.metadata();
1402 assert_eq!(metadata.provider_id, "stub-kg-provider");
1403 assert_eq!(metadata.provider_kind, ProviderKind::Ollama);
1404 assert_eq!(metadata.extraction_model, "stub-model");
1405 assert_eq!(metadata.embedding_model, HASH_SEMANTIC_PROVIDER_ID);
1408 }
1409
1410 #[test]
1411 fn kg_semantic_provider_extracts_concepts_via_tsift_kg_pipeline() {
1412 let provider = KgSemanticProvider::with_extractor(stub_extractor());
1413 let input = SemanticProviderInput {
1414 source_ref: "session.md".to_string(),
1415 memory_kind: "source".to_string(),
1416 label: "kg row".to_string(),
1417 text: "tsift-kg extracts entities via OllamaKgExtractor.".to_string(),
1418 semantic_text: "kg row tsift-kg extracts entities".to_string(),
1419 imported_from: "test".to_string(),
1420 session_id: None,
1421 observed_at_unix: None,
1422 };
1423 let candidates = provider
1424 .extract_concepts(&input)
1425 .expect("KG pipeline should produce candidates");
1426 assert_eq!(candidates.len(), 2);
1427 assert_eq!(candidates[0].label, "tsift-kg");
1428 assert_eq!(candidates[0].stable_key, "kg:crate");
1429 assert_eq!(candidates[1].label, "OllamaKgExtractor");
1430 assert_eq!(candidates[1].stable_key, "kg:struct");
1431 }
1432
1433 #[test]
1434 fn kg_semantic_provider_falls_back_when_extractor_returns_no_entities() {
1435 let empty_stub = StubKgExtractor {
1436 payload_json: r#"{"entities":[],"relations":[]}"#.to_string(),
1437 metadata: tsift_kg::KgExtractorMetadata {
1438 provider_id: "empty-stub".to_string(),
1439 provider_kind: ProviderKind::Ollama,
1440 extraction_model: "empty-stub-model".to_string(),
1441 },
1442 };
1443 let provider = KgSemanticProvider::with_extractor(empty_stub);
1444 let input = SemanticProviderInput {
1445 source_ref: "session.md".to_string(),
1446 memory_kind: "source".to_string(),
1447 label: "ghost row".to_string(),
1448 text: "no entities here".to_string(),
1449 semantic_text: "ghost row no entities here".to_string(),
1450 imported_from: "test".to_string(),
1451 session_id: None,
1452 observed_at_unix: None,
1453 };
1454 let candidates = provider
1455 .extract_concepts(&input)
1456 .expect("empty extraction must not fail the cycle");
1457 assert_eq!(candidates.len(), 1);
1458 assert_eq!(candidates[0].label, "ghost row");
1459 assert_eq!(candidates[0].detail, "semantic row from tsift-kg (empty extraction)");
1460 }
1461
1462 #[test]
1463 fn kg_semantic_provider_embed_uses_hash_fallback() {
1464 let provider = KgSemanticProvider::with_extractor(stub_extractor());
1465 let embedding = provider.embed("tsift-kg ollama").expect("embed succeeds");
1466 assert_eq!(embedding.provider_id, HASH_SEMANTIC_PROVIDER_ID);
1467 assert_eq!(embedding.model, SEMANTIC_EMBEDDING_MODEL);
1468 assert_eq!(embedding.dimensions(), SEMANTIC_EMBEDDING_DIM);
1469 }
1470
1471 #[test]
1472 fn kg_semantic_provider_drives_traversal_projection() {
1473 let dir = TempDir::new().unwrap();
1477 let event = MemoryEvent::new(
1478 MemoryEventKind::ResponseSummary,
1479 "session.md",
1480 "kg semantic provider traversal",
1481 )
1482 .with_session_id("sess-kg")
1483 .with_observed_at_unix(1_700_000_000);
1484 let mut nodes = Vec::new();
1485 let mut edges = Vec::new();
1486 let provider = KgSemanticProvider::with_extractor(stub_extractor());
1487 append_memory_events_as_traversal_rows_with_provider(
1488 dir.path(),
1489 &[event],
1490 &mut nodes,
1491 &mut edges,
1492 &provider,
1493 )
1494 .unwrap();
1495
1496 let semantic = nodes
1497 .iter()
1498 .find(|node| node.kind == "semantic_concept")
1499 .expect("expected semantic concept from KG provider");
1500 assert_eq!(semantic.label, "tsift-kg");
1501 assert_eq!(
1502 semantic.properties.get("semantic_provider"),
1503 Some(&"stub-kg-provider".to_string())
1504 );
1505 assert_eq!(
1506 semantic.properties.get("semantic_provider_kind"),
1507 Some(&"ollama".to_string())
1508 );
1509 assert_eq!(
1510 semantic.properties.get("semantic_extraction_model"),
1511 Some(&"stub-model".to_string())
1512 );
1513 assert!(edges.iter().any(|edge| edge.kind == "mentions_concept"));
1514 }
1515}