Skip to main content

tsift_memgraphrag/
lib.rs

1use anyhow::{Context, Result, bail};
2use serde::{Deserialize, Serialize};
3use std::collections::{BTreeMap, BTreeSet};
4use std::path::Path;
5use tsift_core::{GraphEdge, GraphFreshness, GraphNode, GraphProjection, GraphProvenance};
6use tsift_memory::{
7    DEFAULT_MEMORY_CANDIDATE_LIMIT, MemoryEvent, MemoryReadPolicy, MemoryReadWatermark,
8    estimate_tokens, memory_read_watermark, read_memory_event_candidates, read_memory_events,
9    read_memory_events_with_policy,
10};
11use tsift_sqlite::SqliteGraphStore;
12
13pub const MEMGRAPHRAG_CONTRACT_VERSION: &str = "tsift-memgraphrag-v1";
14pub const SEMANTIC_EMBEDDING_MODEL: &str = "tsift-local-hash-v1";
15
16const SEMANTIC_EMBEDDING_DIM: usize = 32;
17const DEFAULT_TRAVERSAL_MEMORY_EVENT_LIMIT: usize = 600;
18const MEMORY_RANK_CANDIDATE_MULTIPLIER: usize = 8;
19const MEMORY_PROJECTION_NODE_ID: &str = "memory_projection:tsift-memory";
20
21pub fn memory_graph_node_kinds() -> Vec<&'static str> {
22    vec![
23        "memory_session",
24        "memory_event",
25        "session",
26        "source_handle",
27        "semantic_concept",
28        "semantic_vector_handle",
29        "memory_projection",
30    ]
31}
32
33pub fn project_memory_events(events: &[MemoryEvent]) -> GraphProjection {
34    let mut projection = GraphProjection::default();
35    let mut sessions = BTreeSet::new();
36
37    for event in events {
38        let event_id = event.stable_id();
39        if let Some(session_id) = &event.session_id
40            && sessions.insert(session_id.clone())
41        {
42            projection.nodes.push(
43                GraphNode::new(
44                    format!("memsess:{}", blake3::hash(session_id.as_bytes()).to_hex()),
45                    "memory_session",
46                    session_id,
47                )
48                .with_property("session_id", session_id)
49                .with_provenance(GraphProvenance::new("tsift-memory", session_id)),
50            );
51        }
52
53        let mut node = GraphNode::new(&event_id, "memory_event", event.kind.as_str())
54            .with_property("event_kind", event.kind.as_str())
55            .with_property("source_ref", &event.source_ref)
56            .with_property("token_estimate", event.token_estimate.to_string())
57            .with_provenance(GraphProvenance::new("tsift-memory", &event.source_ref));
58        if let Some(imported_from) = &event.imported_from {
59            node = node.with_property("imported_from", imported_from);
60        }
61        if let Some(imported_id) = &event.imported_id {
62            node = node.with_property("imported_id", imported_id);
63        }
64        projection.nodes.push(node);
65
66        if let Some(session_id) = &event.session_id {
67            let session_node_id =
68                format!("memsess:{}", blake3::hash(session_id.as_bytes()).to_hex());
69            projection.edges.push(
70                GraphEdge::new(session_node_id, event_id, "records_memory_event")
71                    .with_provenance(GraphProvenance::new("tsift-memory", &event.source_ref)),
72            );
73        }
74    }
75
76    projection
77}
78
79#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
80pub struct MemoryDecayConfig {
81    pub half_life_secs: f64,
82    pub lexical_weight: f64,
83    pub recency_weight: f64,
84}
85
86impl Default for MemoryDecayConfig {
87    fn default() -> Self {
88        Self {
89            half_life_secs: 7.0 * 24.0 * 3600.0,
90            lexical_weight: 0.6,
91            recency_weight: 0.4,
92        }
93    }
94}
95
96#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
97pub struct ScoredMemoryEvent {
98    pub event: MemoryEvent,
99    pub lexical_score: f64,
100    pub recency_score: f64,
101    pub score: f64,
102}
103
104fn memory_query_terms(query: &str) -> Vec<String> {
105    query
106        .split(|c: char| !c.is_alphanumeric())
107        .filter(|term| !term.is_empty())
108        .map(|term| term.to_lowercase())
109        .collect()
110}
111
112fn memory_lexical_overlap(terms: &[String], text: &str) -> f64 {
113    if terms.is_empty() {
114        return 0.0;
115    }
116    let haystack = text.to_lowercase();
117    let hits = terms
118        .iter()
119        .filter(|term| haystack.contains(term.as_str()))
120        .count();
121    hits as f64 / terms.len() as f64
122}
123
124fn memory_recency_decay(observed_at_unix: Option<i64>, now_unix: i64, half_life_secs: f64) -> f64 {
125    match observed_at_unix {
126        Some(observed) => {
127            let age = (now_unix - observed).max(0) as f64;
128            0.5f64.powf(age / half_life_secs.max(1.0))
129        }
130        None => 0.0,
131    }
132}
133
134pub fn rank_memory_events(
135    events: &[MemoryEvent],
136    query: &str,
137    now_unix: i64,
138    config: MemoryDecayConfig,
139    limit: usize,
140) -> Vec<ScoredMemoryEvent> {
141    let terms = memory_query_terms(query);
142    let mut scored: Vec<ScoredMemoryEvent> = events
143        .iter()
144        .map(|event| {
145            let lexical_score = memory_lexical_overlap(&terms, &event.text);
146            let recency_score =
147                memory_recency_decay(event.observed_at_unix, now_unix, config.half_life_secs);
148            let score =
149                config.lexical_weight * lexical_score + config.recency_weight * recency_score;
150            ScoredMemoryEvent {
151                event: event.clone(),
152                lexical_score,
153                recency_score,
154                score,
155            }
156        })
157        .collect();
158    scored.sort_by(|a, b| {
159        b.score
160            .partial_cmp(&a.score)
161            .unwrap_or(std::cmp::Ordering::Equal)
162            .then_with(|| {
163                b.recency_score
164                    .partial_cmp(&a.recency_score)
165                    .unwrap_or(std::cmp::Ordering::Equal)
166            })
167    });
168    scored.truncate(limit);
169    scored
170}
171
172pub fn memory_rank_candidate_limit(limit: usize) -> usize {
173    if limit == 0 {
174        return 0;
175    }
176    limit
177        .saturating_mul(MEMORY_RANK_CANDIDATE_MULTIPLIER)
178        .min(DEFAULT_MEMORY_CANDIDATE_LIMIT.max(limit))
179}
180
181pub fn rank_memory_event_candidates(
182    memory_db: &Path,
183    query: &str,
184    now_unix: i64,
185    config: MemoryDecayConfig,
186    limit: usize,
187) -> Result<Vec<ScoredMemoryEvent>> {
188    let candidate_limit = memory_rank_candidate_limit(limit);
189    let candidates = read_memory_event_candidates(memory_db, query, candidate_limit)?;
190    Ok(rank_memory_events(
191        &candidates,
192        query,
193        now_unix,
194        config,
195        limit,
196    ))
197}
198
199#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
200pub struct MemoryQueryPlan {
201    pub contract_version: String,
202    pub query: String,
203    pub limit: usize,
204    pub candidate_limit: usize,
205    pub max_tokens: usize,
206    pub estimated_query_tokens: usize,
207    pub decay: MemoryDecayConfig,
208    pub output_contract: Vec<String>,
209    pub next_commands: Vec<String>,
210}
211
212pub fn plan_memory_query(query: &str, limit: usize, max_tokens: usize) -> Result<MemoryQueryPlan> {
213    if query.trim().is_empty() {
214        bail!("memory query must not be empty");
215    }
216    Ok(MemoryQueryPlan {
217        contract_version: MEMGRAPHRAG_CONTRACT_VERSION.to_string(),
218        query: query.to_string(),
219        limit,
220        candidate_limit: memory_rank_candidate_limit(limit),
221        max_tokens,
222        estimated_query_tokens: estimate_tokens(query),
223        decay: MemoryDecayConfig::default(),
224        output_contract: vec![
225            "indexed FTS/recent candidate set capped before ranking".to_string(),
226            "decay-weighted ranked memory_event ids (lexical + recency)".to_string(),
227            "per-event lexical_score, recency_score, and blended score".to_string(),
228            "source_ref handles for expansion".to_string(),
229            "graph node ids for neighborhood projection".to_string(),
230            "token estimates for every returned packet".to_string(),
231        ],
232        next_commands: vec![
233            "tsift memory status . --json".to_string(),
234            "tsift memory project-graph . --json".to_string(),
235            "tsift graph-db --path . --json related '<query>'".to_string(),
236        ],
237    })
238}
239
240#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
241pub struct MemoryGraphProjectReport {
242    pub events_projected: usize,
243    pub nodes_upserted: usize,
244    pub edges_upserted: usize,
245    pub read_policy: MemoryReadPolicy,
246    pub source_watermark: String,
247    pub content_hash: String,
248    pub events_available: usize,
249}
250
251pub fn project_memory_into_graph(
252    memory_db: &Path,
253    graph_db: &Path,
254    limit: usize,
255) -> Result<MemoryGraphProjectReport> {
256    project_memory_into_graph_with_policy(memory_db, graph_db, limit, &MemoryReadPolicy::default())
257}
258
259pub fn project_memory_into_graph_with_policy(
260    memory_db: &Path,
261    graph_db: &Path,
262    limit: usize,
263    read_policy: &MemoryReadPolicy,
264) -> Result<MemoryGraphProjectReport> {
265    let events = read_memory_events_with_policy(memory_db, read_policy, limit)?;
266    let watermark = memory_read_watermark(memory_db, read_policy, limit, &events)?;
267    let mut projection = project_memory_events(&events);
268    append_memory_projection_metadata(&mut projection, &watermark)?;
269    let nodes_upserted = projection.nodes.len();
270    let edges_upserted = projection.edges.len();
271    if let Some(parent) = graph_db.parent() {
272        std::fs::create_dir_all(parent)
273            .with_context(|| format!("create graph db dir {}", parent.display()))?;
274    }
275    let mut store = SqliteGraphStore::open(graph_db)
276        .with_context(|| format!("open graph store {}", graph_db.display()))?;
277    store.upsert_projection(&projection)?;
278    Ok(MemoryGraphProjectReport {
279        events_projected: events.len(),
280        nodes_upserted,
281        edges_upserted,
282        read_policy: read_policy.clone(),
283        source_watermark: watermark.source_watermark,
284        content_hash: watermark.content_hash,
285        events_available: watermark.events_available,
286    })
287}
288
289fn append_memory_projection_metadata(
290    projection: &mut GraphProjection,
291    watermark: &MemoryReadWatermark,
292) -> Result<()> {
293    let mut node = GraphNode::new(
294        MEMORY_PROJECTION_NODE_ID,
295        "memory_projection",
296        "tsift-memory graph projection",
297    )
298    .with_property("handle", MEMORY_PROJECTION_NODE_ID)
299    .with_property("ref_id", "tsift-memory")
300    .with_property("provider", "tsift-memory")
301    .with_property("read_policy", watermark.policy.order.as_str())
302    .with_property("limit", watermark.limit.to_string())
303    .with_property("events_read", watermark.events_read.to_string())
304    .with_property("events_available", watermark.events_available.to_string())
305    .with_property("source_watermark", watermark.source_watermark.clone())
306    .with_property("content_hash", watermark.content_hash.clone())
307    .with_provenance(
308        GraphProvenance::new("tsift-memory", "memory_events")
309            .with_content_hash(watermark.content_hash.clone()),
310    )
311    .with_freshness(GraphFreshness::content_hash(
312        watermark.source_watermark.clone(),
313    ));
314    if let Some(query) = &watermark.policy.query {
315        node = node.with_property("query", query.clone());
316    }
317    if let Some(max_rowid) = watermark.max_rowid {
318        node = node.with_property("max_rowid", max_rowid.to_string());
319    }
320    if let Some(max_observed_at_unix) = watermark.max_observed_at_unix {
321        node = node.with_property("max_observed_at_unix", max_observed_at_unix.to_string());
322    }
323    if let Some(max_created_at_unix) = watermark.max_created_at_unix {
324        node = node.with_property("max_created_at_unix", max_created_at_unix.to_string());
325    }
326    projection.nodes.push(node_with_content_freshness(node)?);
327    Ok(())
328}
329
330#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
331pub struct MemoryOntologyGraphReport {
332    pub type_nodes: usize,
333    pub relations: usize,
334}
335
336pub fn derive_memory_ontology_graph(graph_db: &Path) -> Result<MemoryOntologyGraphReport> {
337    if !graph_db.exists() {
338        bail!(
339            "graph store {} does not exist; run `tsift graph-db refresh` or `tsift memory project-graph` first",
340            graph_db.display()
341        );
342    }
343    let mut store = SqliteGraphStore::open(graph_db)
344        .with_context(|| format!("open graph store {}", graph_db.display()))?;
345    let ontology = store.derive_ontology()?;
346    let type_nodes = ontology.nodes.len();
347    let relations = ontology.edges.len();
348    store.upsert_projection(&ontology)?;
349    Ok(MemoryOntologyGraphReport {
350        type_nodes,
351        relations,
352    })
353}
354
355pub fn append_tsift_memory_graph_projection_rows(
356    root: &Path,
357    nodes: &mut Vec<GraphNode>,
358    edges: &mut Vec<GraphEdge>,
359) -> Result<()> {
360    append_tsift_memory_graph_projection_rows_with_limit(
361        root,
362        nodes,
363        edges,
364        DEFAULT_TRAVERSAL_MEMORY_EVENT_LIMIT,
365    )
366}
367
368pub fn append_tsift_memory_graph_projection_rows_with_limit(
369    root: &Path,
370    nodes: &mut Vec<GraphNode>,
371    edges: &mut Vec<GraphEdge>,
372    event_limit: usize,
373) -> Result<()> {
374    let memory_db = tsift_memory::default_memory_db_path(root);
375    if !memory_db.exists() {
376        return Ok(());
377    }
378    let events = match read_memory_events(&memory_db, event_limit) {
379        Ok(events) => events,
380        Err(_) => return Ok(()),
381    };
382    append_memory_events_as_traversal_rows(root, &events, nodes, edges)
383}
384
385pub fn append_memory_events_as_traversal_rows(
386    root: &Path,
387    events: &[MemoryEvent],
388    nodes: &mut Vec<GraphNode>,
389    edges: &mut Vec<GraphEdge>,
390) -> Result<()> {
391    if events.is_empty() {
392        return Ok(());
393    }
394
395    let mut seen_sessions = BTreeSet::new();
396    let mut edge_map = BTreeMap::<(String, String, String), GraphEdge>::new();
397
398    for event in events {
399        let event_id = event.stable_id();
400        let event_key = memory_event_key(event);
401        let source_handle = stable_handle("tmemsrc", &event_key);
402        let semantic_handle = stable_handle("tmemsem", &event_key);
403        let provenance = GraphProvenance::new("tsift-memory", &event.source_ref);
404        let imported_from = event.imported_from.as_deref().unwrap_or("native");
405
406        if let Some(session_id) = &event.session_id {
407            let session_handle =
408                format!("memsess:{}", blake3::hash(session_id.as_bytes()).to_hex());
409            if seen_sessions.insert(session_id.clone()) {
410                let session_node = GraphNode::new(
411                    session_handle.clone(),
412                    "memory_session",
413                    truncate_for_compact(session_id, 80),
414                )
415                .with_property("handle", session_handle.clone())
416                .with_property("ref_id", session_id.clone())
417                .with_property("session_id", session_id.clone())
418                .with_property("provider", "tsift-memory")
419                .with_property(
420                    "expand",
421                    format!(
422                        "tsift memory status {} --json",
423                        shell_quote(root.to_string_lossy().as_ref())
424                    ),
425                )
426                .with_provenance(provenance.clone());
427                nodes.push(node_with_content_freshness(session_node)?);
428            }
429
430            insert_semantic_edge(
431                &mut edge_map,
432                GraphEdge::new(
433                    session_handle.clone(),
434                    event_id.clone(),
435                    "records_memory_event",
436                )
437                .with_property("label", "tsift-memory session event")
438                .with_provenance(provenance.clone()),
439            );
440            insert_semantic_edge(
441                &mut edge_map,
442                GraphEdge::new(
443                    session_handle,
444                    source_handle.clone(),
445                    "records_memory_source",
446                )
447                .with_property("label", "tsift-memory session source")
448                .with_provenance(provenance.clone()),
449            );
450        }
451
452        let label = memory_event_label(event);
453        let mut event_node = GraphNode::new(event_id.clone(), "memory_event", event.kind.as_str())
454            .with_property("handle", event_id.clone())
455            .with_property("ref_id", event.source_ref.clone())
456            .with_property("source_ref", event.source_ref.clone())
457            .with_property("provider", "tsift-memory")
458            .with_property("memory_kind", event.kind.as_str())
459            .with_property("imported_from", imported_from)
460            .with_property("text_preview", truncate_for_compact(&event.text, 240))
461            .with_property("token_estimate", event.token_estimate.to_string())
462            .with_property(
463                "expand",
464                format!(
465                    "tsift memory status {} --json",
466                    shell_quote(root.to_string_lossy().as_ref())
467                ),
468            )
469            .with_provenance(provenance.clone());
470        if let Some(session_id) = &event.session_id {
471            event_node = event_node.with_property("session_id", session_id.clone());
472        }
473        if let Some(observed_at_unix) = event.observed_at_unix {
474            event_node = event_node.with_property("observed_at_unix", observed_at_unix.to_string());
475        }
476        if let Some(imported_id) = &event.imported_id {
477            event_node = event_node.with_property("imported_id", imported_id.clone());
478        }
479        nodes.push(node_with_content_freshness(event_node)?);
480
481        let mut source_node = GraphNode::new(source_handle.clone(), "source_handle", label.clone())
482            .with_property("handle", source_handle.clone())
483            .with_property("ref_id", event.source_ref.clone())
484            .with_property("source_ref", event.source_ref.clone())
485            .with_property("provider", "tsift-memory")
486            .with_property("memory_kind", event.kind.as_str())
487            .with_property("imported_from", imported_from)
488            .with_property("text_preview", truncate_for_compact(&event.text, 240))
489            .with_property("token_estimate", event.token_estimate.to_string())
490            .with_property(
491                "expand",
492                format!(
493                    "tsift memory status {} --json",
494                    shell_quote(root.to_string_lossy().as_ref())
495                ),
496            )
497            .with_provenance(provenance.clone());
498        if let Some(session_id) = &event.session_id {
499            source_node = source_node.with_property("session_id", session_id.clone());
500        }
501        if let Some(observed_at_unix) = event.observed_at_unix {
502            source_node =
503                source_node.with_property("observed_at_unix", observed_at_unix.to_string());
504        }
505        if let Some(imported_id) = &event.imported_id {
506            source_node = source_node.with_property("imported_id", imported_id.clone());
507        }
508        nodes.push(node_with_content_freshness(source_node)?);
509
510        insert_semantic_edge(
511            &mut edge_map,
512            GraphEdge::new(event_id.clone(), source_handle.clone(), "projects_source")
513                .with_property("label", "tsift-memory source projection")
514                .with_provenance(provenance.clone()),
515        );
516
517        let semantic_text = format!("{} {}", label, event.text);
518        let semantic_node =
519            GraphNode::new(semantic_handle.clone(), "semantic_concept", label.clone())
520                .with_property("handle", semantic_handle.clone())
521                .with_property("ref_id", event.source_ref.clone())
522                .with_property("detail", "semantic row from tsift-memory")
523                .with_property("source_ref", event.source_ref.clone())
524                .with_property("provider", "tsift-memory")
525                .with_property("memory_kind", event.kind.as_str())
526                .with_property("imported_from", imported_from)
527                .with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
528                .with_property("embedding", semantic_embedding_property(&semantic_text))
529                .with_property(
530                    "expand",
531                    semantic_related_command(root, &label, SemanticRelatedKind::Concept),
532                )
533                .with_provenance(provenance.clone());
534        nodes.push(node_with_content_freshness(semantic_node)?);
535
536        insert_semantic_edge(
537            &mut edge_map,
538            GraphEdge::new(
539                source_handle.clone(),
540                semantic_handle.clone(),
541                "mentions_concept",
542            )
543            .with_property("label", "tsift-memory semantic source")
544            .with_provenance(provenance.clone()),
545        );
546    }
547
548    for edge in edge_map.into_values() {
549        edges.push(edge_with_content_freshness(edge)?);
550    }
551
552    Ok(())
553}
554
555fn memory_event_key(event: &MemoryEvent) -> String {
556    match (event.imported_from.as_deref(), event.imported_id.as_deref()) {
557        (Some(imported_from), Some(imported_id)) => {
558            format!("{imported_from}:{imported_id}")
559        }
560        _ => event.stable_id(),
561    }
562}
563
564fn memory_event_label(event: &MemoryEvent) -> String {
565    let first_line = event
566        .text
567        .lines()
568        .map(str::trim)
569        .find(|line| !line.is_empty())
570        .unwrap_or(event.kind.as_str());
571    match event.kind.as_str() {
572        "imported_observation" => {
573            let observation_type = event
574                .metadata
575                .get("observation_type")
576                .map(String::as_str)
577                .unwrap_or("observation");
578            truncate_for_compact(&format!("{observation_type}: {first_line}"), 80)
579        }
580        "imported_session_summary" => truncate_for_compact(&format!("summary: {first_line}"), 80),
581        "imported_user_prompt" => truncate_for_compact(&format!("prompt: {first_line}"), 80),
582        _ => truncate_for_compact(first_line, 80),
583    }
584}
585
586fn truncate_for_compact(input: &str, max_chars: usize) -> String {
587    let trimmed = input.trim();
588    let count = trimmed.chars().count();
589    if count <= max_chars {
590        return trimmed.to_string();
591    }
592    let prefix: String = trimmed.chars().take(max_chars.saturating_sub(3)).collect();
593    format!("{prefix}...")
594}
595
596fn stable_handle(prefix: &str, key: &str) -> String {
597    let mut hasher = blake3::Hasher::new();
598    hasher.update(prefix.as_bytes());
599    hasher.update(&[0]);
600    hasher.update(key.as_bytes());
601    let hex = hasher.finalize().to_hex();
602    format!("{prefix}-{}", &hex[..10])
603}
604
605fn content_hash<T: Serialize>(value: &T) -> Result<String> {
606    let bytes = serde_json::to_vec(value)?;
607    Ok(blake3::hash(&bytes).to_hex().to_string())
608}
609
610fn node_with_content_freshness(mut node: GraphNode) -> Result<GraphNode> {
611    let mut hashable = node.clone();
612    hashable.freshness = None;
613    node.freshness = Some(GraphFreshness::content_hash(content_hash(&hashable)?));
614    Ok(node)
615}
616
617fn edge_with_content_freshness(mut edge: GraphEdge) -> Result<GraphEdge> {
618    let mut hashable = edge.clone();
619    hashable.freshness = None;
620    edge.freshness = Some(GraphFreshness::content_hash(content_hash(&hashable)?));
621    Ok(edge)
622}
623
624#[derive(Clone, Copy)]
625enum SemanticRelatedKind {
626    Concept,
627}
628
629fn semantic_related_kind_name(kind: SemanticRelatedKind) -> &'static str {
630    match kind {
631        SemanticRelatedKind::Concept => "concept",
632    }
633}
634
635fn semantic_related_command(root: &Path, query: &str, kind: SemanticRelatedKind) -> String {
636    format!(
637        "tsift semantic {} --path {} --kind {} --limit 10",
638        shell_quote(query),
639        shell_quote(root.to_string_lossy().as_ref()),
640        semantic_related_kind_name(kind)
641    )
642}
643
644fn semantic_embedding(input: &str) -> Vec<f64> {
645    let mut vector = vec![0.0; SEMANTIC_EMBEDDING_DIM];
646    let mut tokens = traversal_tokens(input);
647    if tokens.is_empty() {
648        let trimmed = input.trim().to_ascii_lowercase();
649        if !trimmed.is_empty() {
650            tokens.insert(trimmed);
651        }
652    }
653
654    for token in tokens {
655        let hash = blake3::hash(token.as_bytes());
656        let bytes = hash.as_bytes();
657        let idx = usize::from(bytes[0]) % SEMANTIC_EMBEDDING_DIM;
658        let sign = if bytes[1] & 1 == 0 { 1.0 } else { -1.0 };
659        vector[idx] += sign;
660    }
661
662    let norm = vector.iter().map(|value| value * value).sum::<f64>().sqrt();
663    if norm > 0.0 {
664        for value in &mut vector {
665            *value /= norm;
666        }
667    }
668    vector
669}
670
671fn semantic_embedding_property(input: &str) -> String {
672    semantic_embedding(input)
673        .iter()
674        .map(|value| format!("{value:.6}"))
675        .collect::<Vec<_>>()
676        .join(",")
677}
678
679fn traversal_tokens(input: &str) -> BTreeSet<String> {
680    input
681        .split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '_' || ch == '-'))
682        .flat_map(|part| part.split(['_', '-']))
683        .map(str::trim)
684        .filter(|part| part.len() >= 3)
685        .map(|part| part.to_ascii_lowercase())
686        .collect()
687}
688
689fn insert_semantic_edge(
690    edge_map: &mut BTreeMap<(String, String, String), GraphEdge>,
691    edge: GraphEdge,
692) {
693    edge_map
694        .entry((edge.from_id.clone(), edge.to_id.clone(), edge.kind.clone()))
695        .or_insert(edge);
696}
697
698fn shell_quote(s: &str) -> String {
699    let unquoted =
700        if (s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')) {
701            &s[1..s.len() - 1]
702        } else {
703            s
704        };
705
706    if unquoted
707        .chars()
708        .all(|c| c.is_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '/')
709    {
710        format!("\"{}\"", unquoted)
711    } else {
712        format!(
713            "\"{}\"",
714            unquoted.replace('\\', "\\\\").replace('"', "\\\"")
715        )
716    }
717}
718
719#[cfg(test)]
720mod tests {
721    use super::*;
722    use tempfile::TempDir;
723    use tsift_memory::{MemoryEventKind, MemoryStore, default_memory_db_path};
724
725    #[test]
726    fn project_memory_events_links_events_to_sessions() {
727        let event = MemoryEvent::new(MemoryEventKind::ResponseSummary, "session.md", "done")
728            .with_session_id("session-a");
729        let projection = project_memory_events(&[event]);
730        assert_eq!(projection.nodes.len(), 2);
731        assert_eq!(projection.edges.len(), 1);
732        assert!(
733            projection
734                .nodes
735                .iter()
736                .any(|node| node.kind == "memory_session")
737        );
738        assert!(
739            projection
740                .nodes
741                .iter()
742                .any(|node| node.kind == "memory_event")
743        );
744    }
745
746    #[test]
747    fn rank_memory_events_prefers_recent_relevant_events() {
748        let now = 1_700_000_000;
749        let old = MemoryEvent::new(
750            MemoryEventKind::ResponseSummary,
751            "old",
752            "graph retrieval design shipped",
753        )
754        .with_observed_at_unix(now - 30 * 24 * 3600);
755        let recent = MemoryEvent::new(
756            MemoryEventKind::ResponseSummary,
757            "recent",
758            "graph retrieval follow-up",
759        )
760        .with_observed_at_unix(now - 60);
761        let config = MemoryDecayConfig {
762            half_life_secs: 7.0 * 24.0 * 3600.0,
763            lexical_weight: 0.5,
764            recency_weight: 0.5,
765        };
766        let ranked = rank_memory_events(&[old, recent], "graph retrieval", now, config, 10);
767        assert_eq!(ranked[0].event.source_ref, "recent");
768    }
769
770    #[test]
771    fn rank_memory_events_keeps_lexical_hits_without_timestamp() {
772        let now = 1_700_000_000;
773        let event = MemoryEvent::new(
774            MemoryEventKind::ResponseSummary,
775            "untimed",
776            "semantic graph memory",
777        );
778        let off_topic_fresh = MemoryEvent::new(
779            MemoryEventKind::ResponseSummary,
780            "fresh",
781            "unrelated build log output",
782        )
783        .with_observed_at_unix(now - 10);
784        let config = MemoryDecayConfig::default();
785        let ranked = rank_memory_events(
786            &[event.clone(), off_topic_fresh],
787            "semantic graph memory",
788            now,
789            config,
790            10,
791        );
792        assert_eq!(ranked[0].event.source_ref, event.source_ref);
793    }
794
795    #[test]
796    fn rank_memory_event_candidates_bounds_db_candidates_before_scoring() {
797        let dir = TempDir::new().unwrap();
798        let memory_db = default_memory_db_path(dir.path());
799        std::fs::create_dir_all(memory_db.parent().unwrap()).unwrap();
800        let store = MemoryStore::open_or_create(&memory_db).unwrap();
801        let now = 1_700_000_000;
802        for index in 0..40 {
803            store
804                .insert_event(
805                    &MemoryEvent::new(
806                        MemoryEventKind::ResponseSummary,
807                        format!("old-{index}"),
808                        format!("ordinary memory event {index}"),
809                    )
810                    .with_observed_at_unix(now - 20_000 - index),
811                )
812                .unwrap();
813        }
814        store
815            .insert_event(
816                &MemoryEvent::new(
817                    MemoryEventKind::ResponseSummary,
818                    "needle",
819                    "semantic needle graph retrieval",
820                )
821                .with_observed_at_unix(now - 30_000),
822            )
823            .unwrap();
824        store
825            .insert_event(
826                &MemoryEvent::new(
827                    MemoryEventKind::ResponseSummary,
828                    "recent",
829                    "fresh unrelated release note",
830                )
831                .with_observed_at_unix(now - 10),
832            )
833            .unwrap();
834
835        assert_eq!(memory_rank_candidate_limit(2), 16);
836        let ranked = rank_memory_event_candidates(
837            &memory_db,
838            "semantic needle",
839            now,
840            MemoryDecayConfig::default(),
841            2,
842        )
843        .unwrap();
844        assert_eq!(ranked.len(), 2);
845        assert!(
846            ranked
847                .iter()
848                .any(|scored| scored.event.source_ref == "needle")
849        );
850        assert!(
851            ranked
852                .iter()
853                .any(|scored| scored.event.source_ref == "recent")
854        );
855    }
856
857    #[test]
858    fn plan_memory_query_carries_default_decay_config() {
859        let plan = plan_memory_query("graph rag", 5, 1500).unwrap();
860        assert_eq!(plan.decay, MemoryDecayConfig::default());
861        assert_eq!(plan.candidate_limit, 40);
862        assert!(
863            plan.output_contract
864                .iter()
865                .any(|contract| contract.contains("candidate set capped before ranking"))
866        );
867        assert!(
868            plan.next_commands
869                .iter()
870                .any(|cmd| cmd.contains("project-graph"))
871        );
872    }
873
874    #[test]
875    fn project_memory_into_graph_persists_memory_nodes() {
876        let dir = TempDir::new().unwrap();
877        let root = dir.path();
878        let memory_db = default_memory_db_path(root);
879        std::fs::create_dir_all(memory_db.parent().unwrap()).unwrap();
880
881        let store = MemoryStore::open_or_create(&memory_db).unwrap();
882        let mut prompt = MemoryEvent::new(
883            MemoryEventKind::PromptTarget,
884            "session.md",
885            "run the gated backlog items",
886        );
887        prompt.session_id = Some("sess-1".to_string());
888        prompt.observed_at_unix = Some(1_700_000_000);
889        let mut response = MemoryEvent::new(
890            MemoryEventKind::ResponseSummary,
891            "session.md",
892            "decay weighted retrieval shipped",
893        );
894        response.session_id = Some("sess-1".to_string());
895        response.observed_at_unix = Some(1_700_000_100);
896        store.insert_event(&prompt).unwrap();
897        store.insert_event(&response).unwrap();
898
899        let graph_db = root.join(".tsift").join("graph.db");
900        let report = project_memory_into_graph(&memory_db, &graph_db, 100).unwrap();
901        assert_eq!(report.events_projected, 2);
902        assert!(
903            report.nodes_upserted >= 3,
904            "two events + one session node, got {}",
905            report.nodes_upserted
906        );
907        assert!(
908            report.edges_upserted >= 2,
909            "session records each event, got {}",
910            report.edges_upserted
911        );
912
913        let conn = rusqlite::Connection::open(&graph_db).unwrap();
914        let memory_events: i64 = conn
915            .query_row(
916                "SELECT COUNT(*) FROM graph_nodes WHERE kind = 'memory_event'",
917                [],
918                |row| row.get(0),
919            )
920            .unwrap();
921        assert_eq!(memory_events, 2);
922        let sessions: i64 = conn
923            .query_row(
924                "SELECT COUNT(*) FROM graph_nodes WHERE kind = 'memory_session'",
925                [],
926                |row| row.get(0),
927            )
928            .unwrap();
929        assert_eq!(sessions, 1);
930    }
931
932    #[test]
933    fn traversal_projection_adds_semantic_memory_rows() {
934        let dir = TempDir::new().unwrap();
935        let event = MemoryEvent::new(
936            MemoryEventKind::ResponseSummary,
937            "session.md",
938            "semantic memory graph",
939        )
940        .with_session_id("sess-1")
941        .with_observed_at_unix(1_700_000_000);
942        let mut nodes = Vec::new();
943        let mut edges = Vec::new();
944        append_memory_events_as_traversal_rows(dir.path(), &[event], &mut nodes, &mut edges)
945            .unwrap();
946
947        assert!(nodes.iter().any(|node| node.kind == "memory_event"));
948        assert!(nodes.iter().any(|node| {
949            node.kind == "semantic_concept"
950                && node.properties.get("provider") == Some(&"tsift-memory".to_string())
951        }));
952        assert!(edges.iter().any(|edge| edge.kind == "mentions_concept"));
953    }
954}