Skip to main content

tsift_memory/
lib.rs

1use anyhow::{Context, Result, bail};
2use rusqlite::{Connection, OpenFlags, OptionalExtension, params, params_from_iter};
3use serde::{Deserialize, Serialize};
4use std::collections::{BTreeMap, BTreeSet};
5use std::path::{Path, PathBuf};
6use tsift_core::{GraphEdge, GraphNode, GraphProjection, GraphProvenance};
7
8pub const MEMORY_CONTRACT_VERSION: &str = "tsift-memory-v1";
9pub const MEMORY_SCHEMA_VERSION: i64 = 1;
10pub const DEFAULT_MAX_PROMPT_TOKENS: usize = 4096;
11pub const DEFAULT_RESERVE_TOKENS: usize = 512;
12pub const DEFAULT_MAX_EVENT_TOKENS: usize = 1536;
13pub const MEMORY_BUDGET_GUARD_CONTRACT_VERSION: &str = "tsift-memory-budget-guard-v1";
14
15const MAX_IMPORT_EVENT_IDS: usize = 100;
16
17const MEMORY_SCHEMA_SQL: &str = r#"
18PRAGMA foreign_keys = ON;
19
20CREATE TABLE IF NOT EXISTS memory_schema_versions (
21  version INTEGER PRIMARY KEY,
22  applied_at_unix INTEGER NOT NULL
23);
24
25INSERT OR IGNORE INTO memory_schema_versions(version, applied_at_unix)
26VALUES (1, strftime('%s','now'));
27
28CREATE TABLE IF NOT EXISTS memory_events (
29  id TEXT PRIMARY KEY,
30  kind TEXT NOT NULL,
31  session_id TEXT,
32  source_ref TEXT NOT NULL,
33  text TEXT NOT NULL,
34  metadata_json TEXT NOT NULL DEFAULT '{}',
35  observed_at_unix INTEGER,
36  token_estimate INTEGER NOT NULL,
37  imported_from TEXT,
38  imported_id TEXT,
39  created_at_unix INTEGER NOT NULL DEFAULT (strftime('%s','now'))
40);
41
42CREATE INDEX IF NOT EXISTS idx_memory_events_kind
43ON memory_events(kind);
44
45CREATE INDEX IF NOT EXISTS idx_memory_events_session
46ON memory_events(session_id);
47
48CREATE UNIQUE INDEX IF NOT EXISTS idx_memory_events_import_source
49ON memory_events(imported_from, imported_id)
50WHERE imported_from IS NOT NULL AND imported_id IS NOT NULL;
51
52CREATE TABLE IF NOT EXISTS memory_session_summaries (
53  id TEXT PRIMARY KEY,
54  session_id TEXT NOT NULL,
55  summary TEXT NOT NULL,
56  metadata_json TEXT NOT NULL DEFAULT '{}',
57  observed_at_unix INTEGER,
58  token_estimate INTEGER NOT NULL,
59  created_at_unix INTEGER NOT NULL DEFAULT (strftime('%s','now'))
60);
61
62CREATE TABLE IF NOT EXISTS memory_artifacts (
63  id TEXT PRIMARY KEY,
64  session_id TEXT,
65  source_ref TEXT NOT NULL,
66  artifact_kind TEXT NOT NULL,
67  path TEXT,
68  handle TEXT,
69  metadata_json TEXT NOT NULL DEFAULT '{}',
70  token_estimate INTEGER NOT NULL DEFAULT 0,
71  created_at_unix INTEGER NOT NULL DEFAULT (strftime('%s','now'))
72);
73
74CREATE TABLE IF NOT EXISTS memory_tool_spans (
75  id TEXT PRIMARY KEY,
76  session_id TEXT,
77  tool_name TEXT NOT NULL,
78  input_artifact_id TEXT,
79  output_artifact_id TEXT,
80  status TEXT NOT NULL,
81  metadata_json TEXT NOT NULL DEFAULT '{}',
82  started_at_unix INTEGER,
83  completed_at_unix INTEGER,
84  FOREIGN KEY(input_artifact_id) REFERENCES memory_artifacts(id),
85  FOREIGN KEY(output_artifact_id) REFERENCES memory_artifacts(id)
86);
87
88CREATE TABLE IF NOT EXISTS memory_embeddings (
89  id TEXT PRIMARY KEY,
90  owner_kind TEXT NOT NULL,
91  owner_id TEXT NOT NULL,
92  provider TEXT NOT NULL,
93  model TEXT NOT NULL,
94  vector_ref TEXT NOT NULL,
95  dimensions INTEGER,
96  metadata_json TEXT NOT NULL DEFAULT '{}',
97  created_at_unix INTEGER NOT NULL DEFAULT (strftime('%s','now'))
98);
99
100CREATE TABLE IF NOT EXISTS memory_graph_links (
101  id TEXT PRIMARY KEY,
102  memory_event_id TEXT NOT NULL,
103  graph_node_id TEXT NOT NULL,
104  graph_edge_id TEXT,
105  link_kind TEXT NOT NULL,
106  metadata_json TEXT NOT NULL DEFAULT '{}',
107  created_at_unix INTEGER NOT NULL DEFAULT (strftime('%s','now')),
108  FOREIGN KEY(memory_event_id) REFERENCES memory_events(id)
109);
110
111CREATE TABLE IF NOT EXISTS memory_import_runs (
112  id TEXT PRIMARY KEY,
113  source TEXT NOT NULL,
114  source_ref TEXT NOT NULL,
115  status TEXT NOT NULL,
116  imported_events INTEGER NOT NULL DEFAULT 0,
117  warnings_json TEXT NOT NULL DEFAULT '[]',
118  started_at_unix INTEGER NOT NULL DEFAULT (strftime('%s','now')),
119  completed_at_unix INTEGER
120);
121"#;
122
123pub fn memory_schema_sql() -> &'static str {
124    MEMORY_SCHEMA_SQL
125}
126
127pub fn default_memory_db_path(project_root: &Path) -> PathBuf {
128    project_root.join(".tsift").join("memory.db")
129}
130
131pub fn default_claude_mem_db_path() -> Option<PathBuf> {
132    std::env::var_os("HOME")
133        .map(PathBuf::from)
134        .map(|home| home.join(".claude-mem").join("claude-mem.db"))
135}
136
137pub fn estimate_tokens(text: &str) -> usize {
138    let bytes = text.trim().len();
139    if bytes == 0 { 0 } else { bytes.div_ceil(4) }
140}
141
142#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
143#[serde(rename_all = "snake_case")]
144pub enum MemoryEventKind {
145    PromptTarget,
146    ToolCall,
147    ToolResultArtifact,
148    ResponseSummary,
149    CloseoutProof,
150    SessionCheck,
151    ImportedObservation,
152    ImportedSessionSummary,
153    ImportedUserPrompt,
154}
155
156impl MemoryEventKind {
157    pub fn as_str(self) -> &'static str {
158        match self {
159            Self::PromptTarget => "prompt_target",
160            Self::ToolCall => "tool_call",
161            Self::ToolResultArtifact => "tool_result_artifact",
162            Self::ResponseSummary => "response_summary",
163            Self::CloseoutProof => "closeout_proof",
164            Self::SessionCheck => "session_check",
165            Self::ImportedObservation => "imported_observation",
166            Self::ImportedSessionSummary => "imported_session_summary",
167            Self::ImportedUserPrompt => "imported_user_prompt",
168        }
169    }
170
171    pub fn parse(raw: &str) -> Result<Self> {
172        match raw {
173            "prompt_target" => Ok(Self::PromptTarget),
174            "tool_call" => Ok(Self::ToolCall),
175            "tool_result_artifact" => Ok(Self::ToolResultArtifact),
176            "response_summary" => Ok(Self::ResponseSummary),
177            "closeout_proof" => Ok(Self::CloseoutProof),
178            "session_check" => Ok(Self::SessionCheck),
179            "imported_observation" => Ok(Self::ImportedObservation),
180            "imported_session_summary" => Ok(Self::ImportedSessionSummary),
181            "imported_user_prompt" => Ok(Self::ImportedUserPrompt),
182            other => bail!("unsupported memory event kind `{other}`"),
183        }
184    }
185}
186
187#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
188pub struct MemoryEvent {
189    pub kind: MemoryEventKind,
190    #[serde(skip_serializing_if = "Option::is_none")]
191    pub session_id: Option<String>,
192    pub source_ref: String,
193    pub text: String,
194    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
195    pub metadata: BTreeMap<String, String>,
196    #[serde(skip_serializing_if = "Option::is_none")]
197    pub observed_at_unix: Option<i64>,
198    pub token_estimate: usize,
199    #[serde(skip_serializing_if = "Option::is_none")]
200    pub imported_from: Option<String>,
201    #[serde(skip_serializing_if = "Option::is_none")]
202    pub imported_id: Option<String>,
203}
204
205impl MemoryEvent {
206    pub fn new(
207        kind: MemoryEventKind,
208        source_ref: impl Into<String>,
209        text: impl Into<String>,
210    ) -> Self {
211        let text = text.into();
212        Self {
213            kind,
214            session_id: None,
215            source_ref: source_ref.into(),
216            token_estimate: estimate_tokens(&text),
217            text,
218            metadata: BTreeMap::new(),
219            observed_at_unix: None,
220            imported_from: None,
221            imported_id: None,
222        }
223    }
224
225    pub fn with_session_id(mut self, session_id: impl Into<String>) -> Self {
226        self.session_id = Some(session_id.into());
227        self
228    }
229
230    pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
231        self.metadata.insert(key.into(), value.into());
232        self
233    }
234
235    pub fn with_observed_at_unix(mut self, observed_at_unix: i64) -> Self {
236        self.observed_at_unix = Some(observed_at_unix);
237        self
238    }
239
240    pub fn with_import(mut self, source: impl Into<String>, id: impl Into<String>) -> Self {
241        self.imported_from = Some(source.into());
242        self.imported_id = Some(id.into());
243        self
244    }
245
246    pub fn stable_id(&self) -> String {
247        let raw = serde_json::json!([
248            self.kind.as_str(),
249            self.session_id,
250            self.source_ref,
251            self.text,
252            self.observed_at_unix,
253            self.imported_from,
254            self.imported_id
255        ])
256        .to_string();
257        format!("memevt:{}", blake3::hash(raw.as_bytes()).to_hex())
258    }
259}
260
261#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
262pub struct MemoryBudget {
263    pub max_prompt_tokens: usize,
264    pub reserve_tokens: usize,
265    pub max_event_tokens: usize,
266}
267
268impl Default for MemoryBudget {
269    fn default() -> Self {
270        Self {
271            max_prompt_tokens: DEFAULT_MAX_PROMPT_TOKENS,
272            reserve_tokens: DEFAULT_RESERVE_TOKENS,
273            max_event_tokens: DEFAULT_MAX_EVENT_TOKENS,
274        }
275    }
276}
277
278impl MemoryBudget {
279    pub fn new(max_prompt_tokens: usize) -> Self {
280        Self {
281            max_prompt_tokens,
282            ..Self::default()
283        }
284    }
285
286    pub fn available_tokens(self) -> usize {
287        self.max_prompt_tokens.saturating_sub(self.reserve_tokens)
288    }
289}
290
291#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
292pub struct BudgetedMemoryEvent {
293    pub id: String,
294    pub kind: String,
295    pub source_ref: String,
296    pub token_estimate: usize,
297}
298
299#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
300pub struct DeferredMemoryEvent {
301    pub id: String,
302    pub kind: String,
303    pub source_ref: String,
304    pub token_estimate: usize,
305    pub reason: String,
306    pub recommended_chunks: usize,
307}
308
309#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
310pub struct MemoryHandoffPlan {
311    pub contract_version: String,
312    pub status: String,
313    pub max_prompt_tokens: usize,
314    pub reserve_tokens: usize,
315    pub available_tokens: usize,
316    pub estimated_included_tokens: usize,
317    pub included_events: Vec<BudgetedMemoryEvent>,
318    pub deferred_events: Vec<DeferredMemoryEvent>,
319    pub next_commands: Vec<String>,
320}
321
322pub fn plan_capture_handoff(events: &[MemoryEvent], budget: MemoryBudget) -> MemoryHandoffPlan {
323    let mut used = 0usize;
324    let mut included_events = Vec::new();
325    let mut deferred_events = Vec::new();
326    let available = budget.available_tokens();
327
328    for event in events {
329        let id = event.stable_id();
330        if event.token_estimate > budget.max_event_tokens {
331            deferred_events.push(DeferredMemoryEvent {
332                id,
333                kind: event.kind.as_str().to_string(),
334                source_ref: event.source_ref.clone(),
335                token_estimate: event.token_estimate,
336                reason: "event_exceeds_max_event_tokens".to_string(),
337                recommended_chunks: event
338                    .token_estimate
339                    .div_ceil(budget.max_event_tokens.max(1)),
340            });
341            continue;
342        }
343
344        if used + event.token_estimate <= available {
345            used += event.token_estimate;
346            included_events.push(BudgetedMemoryEvent {
347                id,
348                kind: event.kind.as_str().to_string(),
349                source_ref: event.source_ref.clone(),
350                token_estimate: event.token_estimate,
351            });
352        } else {
353            deferred_events.push(DeferredMemoryEvent {
354                id,
355                kind: event.kind.as_str().to_string(),
356                source_ref: event.source_ref.clone(),
357                token_estimate: event.token_estimate,
358                reason: "handoff_budget_exhausted".to_string(),
359                recommended_chunks: 1,
360            });
361        }
362    }
363
364    MemoryHandoffPlan {
365        contract_version: MEMORY_CONTRACT_VERSION.to_string(),
366        status: if deferred_events.is_empty() {
367            "ready".to_string()
368        } else {
369            "split_required".to_string()
370        },
371        max_prompt_tokens: budget.max_prompt_tokens,
372        reserve_tokens: budget.reserve_tokens,
373        available_tokens: available,
374        estimated_included_tokens: used,
375        included_events,
376        deferred_events,
377        next_commands: vec![
378            "tsift memory handoff-plan '<event text>' --budget-tokens <n> --json".to_string(),
379            "tsift --envelope context-pack <session.md> --budget normal".to_string(),
380        ],
381    }
382}
383
384#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
385pub struct MemoryBudgetGuardInput {
386    pub source_ref: String,
387    pub payload_kind: String,
388    pub text: String,
389}
390
391impl MemoryBudgetGuardInput {
392    pub fn new(
393        source_ref: impl Into<String>,
394        payload_kind: impl Into<String>,
395        text: impl Into<String>,
396    ) -> Self {
397        Self {
398            source_ref: source_ref.into(),
399            payload_kind: payload_kind.into(),
400            text: text.into(),
401        }
402    }
403}
404
405#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
406pub struct MemoryBudgetReplacement {
407    pub strategy: String,
408    pub artifact_ref: String,
409    pub digest_command: String,
410    pub context_command: String,
411    pub session_review_command: String,
412}
413
414#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
415pub struct MemoryRetryChunk {
416    pub index: usize,
417    pub source_ref: String,
418    pub byte_start: usize,
419    pub byte_end: usize,
420    pub token_estimate: usize,
421    pub retry_command: String,
422}
423
424#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
425pub struct MemoryBudgetGuardReport {
426    pub contract_version: String,
427    pub status: String,
428    pub allowed: bool,
429    pub source_ref: String,
430    pub payload_kind: String,
431    pub byte_count: usize,
432    pub estimated_tokens: usize,
433    pub max_prompt_tokens: usize,
434    pub reserve_tokens: usize,
435    pub available_tokens: usize,
436    pub max_chunk_tokens: usize,
437    #[serde(skip_serializing_if = "Option::is_none")]
438    pub replacement: Option<MemoryBudgetReplacement>,
439    pub retryable_chunk_plan: Vec<MemoryRetryChunk>,
440    pub warnings: Vec<String>,
441    pub next_commands: Vec<String>,
442}
443
444pub fn guard_memory_handoff(
445    input: MemoryBudgetGuardInput,
446    budget: MemoryBudget,
447) -> MemoryBudgetGuardReport {
448    let estimated_tokens = estimate_tokens(&input.text);
449    let available_tokens = budget.available_tokens();
450    let allowed =
451        estimated_tokens <= available_tokens && estimated_tokens <= budget.max_event_tokens;
452    let replacement = (!allowed).then(|| replacement_for_budget_guard(&input));
453    let retryable_chunk_plan = if allowed {
454        Vec::new()
455    } else {
456        retry_chunks_for_budget_guard(&input, budget.max_event_tokens.min(available_tokens).max(1))
457    };
458    let mut warnings = Vec::new();
459    if estimated_tokens > available_tokens {
460        warnings.push("payload_exceeds_available_prompt_budget".to_string());
461    }
462    if estimated_tokens > budget.max_event_tokens {
463        warnings.push("payload_exceeds_max_chunk_tokens".to_string());
464    }
465
466    MemoryBudgetGuardReport {
467        contract_version: MEMORY_BUDGET_GUARD_CONTRACT_VERSION.to_string(),
468        status: if allowed {
469            "ready".to_string()
470        } else {
471            "blocked_split_required".to_string()
472        },
473        allowed,
474        source_ref: input.source_ref.clone(),
475        payload_kind: input.payload_kind.clone(),
476        byte_count: input.text.len(),
477        estimated_tokens,
478        max_prompt_tokens: budget.max_prompt_tokens,
479        reserve_tokens: budget.reserve_tokens,
480        available_tokens,
481        max_chunk_tokens: budget.max_event_tokens,
482        replacement,
483        retryable_chunk_plan,
484        warnings,
485        next_commands: vec![
486            format!(
487                "tsift memory budget-guard --file {} --json",
488                shell_quote(&input.source_ref)
489            ),
490            "tsift --envelope context-pack <session.md> --budget normal".to_string(),
491            "tsift --envelope session-review <session.md> --next-context --budget normal"
492                .to_string(),
493        ],
494    }
495}
496
497fn replacement_for_budget_guard(input: &MemoryBudgetGuardInput) -> MemoryBudgetReplacement {
498    let quoted_ref = shell_quote(&input.source_ref);
499    let is_transcript = matches!(
500        input.payload_kind.as_str(),
501        "transcript" | "session" | "session_transcript" | "agent_doc"
502    ) || input.source_ref.ends_with(".jsonl")
503        || input.source_ref.ends_with(".md");
504    let strategy = if is_transcript {
505        "replace_raw_transcript_with_session_review_or_context_pack_handle"
506    } else {
507        "replace_raw_tool_or_log_payload_with_digest_artifact_handle"
508    };
509    MemoryBudgetReplacement {
510        strategy: strategy.to_string(),
511        artifact_ref: format!(
512            "artifact:{}",
513            blake3::hash(input.source_ref.as_bytes()).to_hex()
514        ),
515        digest_command: if is_transcript {
516            format!("tsift --envelope session-review {quoted_ref} --next-context --budget normal")
517        } else {
518            format!("tsift log-digest --path . --input {quoted_ref} --json")
519        },
520        context_command: format!("tsift --envelope context-pack {quoted_ref} --budget normal"),
521        session_review_command: format!(
522            "tsift --envelope session-review {quoted_ref} --next-context --budget normal"
523        ),
524    }
525}
526
527fn retry_chunks_for_budget_guard(
528    input: &MemoryBudgetGuardInput,
529    max_chunk_tokens: usize,
530) -> Vec<MemoryRetryChunk> {
531    let byte_budget = max_chunk_tokens.saturating_mul(4).max(1);
532    let mut chunks = Vec::new();
533    let mut start = 0usize;
534    while start < input.text.len() {
535        let mut end = (start + byte_budget).min(input.text.len());
536        while end > start && !input.text.is_char_boundary(end) {
537            end -= 1;
538        }
539        if end == start {
540            if let Some((offset, ch)) = input.text[start..].char_indices().next() {
541                end = start + offset + ch.len_utf8();
542            } else {
543                break;
544            }
545        }
546        let token_estimate = estimate_tokens(&input.text[start..end]);
547        let index = chunks.len() + 1;
548        chunks.push(MemoryRetryChunk {
549            index,
550            source_ref: format!("{}#chunk-{index}", input.source_ref),
551            byte_start: start,
552            byte_end: end,
553            token_estimate,
554            retry_command: retry_chunk_command(input, index, start, end),
555        });
556        start = end;
557    }
558    chunks
559}
560
561fn retry_chunk_command(
562    input: &MemoryBudgetGuardInput,
563    index: usize,
564    byte_start: usize,
565    byte_end: usize,
566) -> String {
567    let chunk_ref = format!("{}#chunk-{index}", input.source_ref);
568    if input.source_ref == "inline" {
569        format!(
570            "tsift memory budget-guard --text '<chunk {index} payload>' --source-ref {} --budget-tokens <n> --json",
571            shell_quote(&chunk_ref)
572        )
573    } else {
574        format!(
575            "tsift memory budget-guard --file {} --byte-start {byte_start} --byte-end {byte_end} --source-ref {} --budget-tokens <n> --json",
576            shell_quote(&input.source_ref),
577            shell_quote(&chunk_ref)
578        )
579    }
580}
581
582fn shell_quote(s: &str) -> String {
583    if s.chars()
584        .all(|c| c.is_alphanumeric() || matches!(c, '_' | '-' | '.' | '/'))
585    {
586        s.to_string()
587    } else {
588        format!("'{}'", s.replace('\'', "'\\''"))
589    }
590}
591
592#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
593pub struct MemoryHookSpec {
594    pub name: String,
595    pub event_kind: String,
596    pub required_fields: Vec<String>,
597    pub budget_behavior: String,
598}
599
600pub fn agent_doc_hook_contract() -> Vec<MemoryHookSpec> {
601    vec![
602        MemoryHookSpec {
603            name: "prompt_target".to_string(),
604            event_kind: MemoryEventKind::PromptTarget.as_str().to_string(),
605            required_fields: vec!["session_path".to_string(), "prompt_target".to_string()],
606            budget_behavior:
607                "capture text plus graph/source handles; never inline raw transcript blobs"
608                    .to_string(),
609        },
610        MemoryHookSpec {
611            name: "tool_result_artifact".to_string(),
612            event_kind: MemoryEventKind::ToolResultArtifact.as_str().to_string(),
613            required_fields: vec!["tool_name".to_string(), "artifact_handle".to_string()],
614            budget_behavior: "store artifact handle and digest, not full raw output".to_string(),
615        },
616        MemoryHookSpec {
617            name: "response_summary".to_string(),
618            event_kind: MemoryEventKind::ResponseSummary.as_str().to_string(),
619            required_fields: vec!["response_topic".to_string(), "summary".to_string()],
620            budget_behavior: "summaries are capped before write and linked to source handles"
621                .to_string(),
622        },
623        MemoryHookSpec {
624            name: "closeout_proof".to_string(),
625            event_kind: MemoryEventKind::CloseoutProof.as_str().to_string(),
626            required_fields: vec!["commit_hash".to_string(), "changed_paths".to_string()],
627            budget_behavior: "proof records are structured metadata with compact prose".to_string(),
628        },
629        MemoryHookSpec {
630            name: "session_check".to_string(),
631            event_kind: MemoryEventKind::SessionCheck.as_str().to_string(),
632            required_fields: vec!["status".to_string(), "session_path".to_string()],
633            budget_behavior: "persist pass/fail state and recovery commands".to_string(),
634        },
635    ]
636}
637
638pub fn agent_doc_closeout_events(
639    session_path: &Path,
640    prompt_target: &str,
641    response_summary: &str,
642    commit_hash: Option<&str>,
643    session_check_status: &str,
644) -> Vec<MemoryEvent> {
645    let session_id = session_path.display().to_string();
646    let mut events = vec![
647        MemoryEvent::new(
648            MemoryEventKind::PromptTarget,
649            session_path.display().to_string(),
650            prompt_target,
651        )
652        .with_session_id(session_id.clone()),
653        MemoryEvent::new(
654            MemoryEventKind::ResponseSummary,
655            session_path.display().to_string(),
656            response_summary,
657        )
658        .with_session_id(session_id.clone()),
659        MemoryEvent::new(
660            MemoryEventKind::SessionCheck,
661            session_path.display().to_string(),
662            session_check_status,
663        )
664        .with_session_id(session_id.clone())
665        .with_metadata("status", session_check_status),
666    ];
667
668    if let Some(commit_hash) = commit_hash {
669        events.push(
670            MemoryEvent::new(
671                MemoryEventKind::CloseoutProof,
672                session_path.display().to_string(),
673                commit_hash,
674            )
675            .with_session_id(session_id)
676            .with_metadata("commit_hash", commit_hash),
677        );
678    }
679
680    events
681}
682
683#[derive(Debug, Clone, PartialEq, Eq)]
684pub struct MemoryInsertResult {
685    pub id: String,
686    pub inserted: bool,
687}
688
689pub struct MemoryStore {
690    conn: Connection,
691}
692
693impl MemoryStore {
694    pub fn open_or_create(path: &Path) -> Result<Self> {
695        if let Some(parent) = path.parent() {
696            std::fs::create_dir_all(parent)
697                .with_context(|| format!("create {}", parent.display()))?;
698        }
699        let conn = Connection::open(path).with_context(|| format!("open {}", path.display()))?;
700        conn.execute_batch(MEMORY_SCHEMA_SQL)
701            .with_context(|| format!("initialize {}", path.display()))?;
702        Ok(Self { conn })
703    }
704
705    pub fn insert_event(&self, event: &MemoryEvent) -> Result<String> {
706        Ok(self.insert_event_result(event)?.id)
707    }
708
709    pub fn insert_event_result(&self, event: &MemoryEvent) -> Result<MemoryInsertResult> {
710        insert_event_on(&self.conn, event)
711    }
712
713    pub fn insert_events(&mut self, events: &[MemoryEvent]) -> Result<Vec<MemoryInsertResult>> {
714        let tx = self.conn.transaction()?;
715        let mut results = Vec::with_capacity(events.len());
716        for event in events {
717            results.push(insert_event_on(&tx, event)?);
718        }
719        tx.commit()?;
720        Ok(results)
721    }
722
723    pub fn event_count(&self) -> Result<usize> {
724        let count: i64 = self
725            .conn
726            .query_row("SELECT COUNT(*) FROM memory_events", [], |row| row.get(0))?;
727        Ok(count as usize)
728    }
729}
730
731fn insert_event_on(conn: &Connection, event: &MemoryEvent) -> Result<MemoryInsertResult> {
732    let id = event.stable_id();
733    let metadata_json = serde_json::to_string(&event.metadata)?;
734    let changed = conn.execute(
735        r#"
736            INSERT OR IGNORE INTO memory_events(
737              id, kind, session_id, source_ref, text, metadata_json,
738              observed_at_unix, token_estimate, imported_from, imported_id
739            )
740            VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)
741            "#,
742        params![
743            &id,
744            event.kind.as_str(),
745            event.session_id.as_deref(),
746            &event.source_ref,
747            &event.text,
748            &metadata_json,
749            event.observed_at_unix,
750            event.token_estimate as i64,
751            event.imported_from.as_deref(),
752            event.imported_id.as_deref()
753        ],
754    )?;
755    Ok(MemoryInsertResult {
756        id,
757        inserted: changed > 0,
758    })
759}
760
761pub fn read_memory_events(memory_db_path: &Path, limit: usize) -> Result<Vec<MemoryEvent>> {
762    if !memory_db_path.exists() {
763        return Ok(Vec::new());
764    }
765    let conn = Connection::open_with_flags(
766        memory_db_path,
767        OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_URI,
768    )
769    .with_context(|| format!("open memory db {}", memory_db_path.display()))?;
770    let mut stmt = conn.prepare(
771        r#"
772        SELECT kind, session_id, source_ref, text, metadata_json,
773               observed_at_unix, token_estimate, imported_from, imported_id
774        FROM memory_events
775        ORDER BY COALESCE(observed_at_unix, created_at_unix), id
776        LIMIT ?1
777        "#,
778    )?;
779    let mut rows = stmt.query([limit as i64])?;
780    let mut events = Vec::new();
781    while let Some(row) = rows.next()? {
782        let kind_raw: String = row.get(0)?;
783        let session_id: Option<String> = row.get(1)?;
784        let source_ref: String = row.get(2)?;
785        let text: String = row.get(3)?;
786        let metadata_json: String = row.get(4)?;
787        let observed_at_unix: Option<i64> = row.get(5)?;
788        let token_estimate: i64 = row.get(6)?;
789        let imported_from: Option<String> = row.get(7)?;
790        let imported_id: Option<String> = row.get(8)?;
791        let metadata = serde_json::from_str::<BTreeMap<String, String>>(&metadata_json)
792            .with_context(|| format!("parse memory metadata for {source_ref}"))?;
793        let mut event = MemoryEvent::new(MemoryEventKind::parse(&kind_raw)?, source_ref, text);
794        event.session_id = session_id;
795        event.metadata = metadata;
796        event.observed_at_unix = observed_at_unix;
797        event.token_estimate = token_estimate.max(0) as usize;
798        event.imported_from = imported_from;
799        event.imported_id = imported_id;
800        events.push(event);
801    }
802    Ok(events)
803}
804
805#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
806pub struct ClaudeMemTablePlan {
807    pub table: String,
808    pub supported: bool,
809    pub rows: usize,
810    pub columns: Vec<String>,
811    #[serde(skip_serializing_if = "Option::is_none")]
812    pub unsupported_reason: Option<String>,
813}
814
815#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
816pub struct ClaudeMemImportPlan {
817    pub db_path: String,
818    pub exists: bool,
819    pub readable: bool,
820    #[serde(skip_serializing_if = "Option::is_none")]
821    pub chroma_path: Option<String>,
822    pub chroma_present: bool,
823    pub observations: ClaudeMemTablePlan,
824    pub session_summaries: ClaudeMemTablePlan,
825    pub user_prompts: ClaudeMemTablePlan,
826    pub pending_messages: ClaudeMemTablePlan,
827    pub warnings: Vec<String>,
828    pub next_commands: Vec<String>,
829}
830
831#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
832pub struct ClaudeMemTableReconciliation {
833    pub table: String,
834    pub source_rows: usize,
835    pub read_events: usize,
836    pub planned_events: usize,
837    pub imported_events: usize,
838    pub already_present_events: usize,
839    pub skipped_source_rows: usize,
840    #[serde(skip_serializing_if = "Option::is_none")]
841    pub limit_per_table: Option<usize>,
842    pub complete: bool,
843}
844
845#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
846pub struct ClaudeMemImportReconciliation {
847    pub observations: ClaudeMemTableReconciliation,
848    pub session_summaries: ClaudeMemTableReconciliation,
849    pub user_prompts: ClaudeMemTableReconciliation,
850    pub total_source_rows: usize,
851    pub total_read_events: usize,
852    pub total_planned_events: usize,
853    pub total_imported_events: usize,
854    pub total_already_present_events: usize,
855    pub total_skipped_source_rows: usize,
856    pub complete: bool,
857}
858
859fn empty_table_plan(table: &str) -> ClaudeMemTablePlan {
860    ClaudeMemTablePlan {
861        table: table.to_string(),
862        supported: false,
863        rows: 0,
864        columns: Vec::new(),
865        unsupported_reason: None,
866    }
867}
868
869const PENDING_MESSAGES_EXCLUSION_REASON: &str = "pending_messages is intentionally excluded from claude-mem import because it contains transient queue state and raw tool payload fields, not durable replacement memory";
870
871pub fn inspect_claude_mem(db_path: &Path) -> Result<ClaudeMemImportPlan> {
872    let chroma_path = db_path.parent().map(|parent| parent.join("chroma"));
873    let chroma_present = chroma_path.as_ref().is_some_and(|path| path.exists());
874    let mut plan = ClaudeMemImportPlan {
875        db_path: db_path.display().to_string(),
876        exists: db_path.exists(),
877        readable: false,
878        chroma_path: chroma_path.as_ref().map(|path| path.display().to_string()),
879        chroma_present,
880        observations: empty_table_plan("observations"),
881        session_summaries: empty_table_plan("session_summaries"),
882        user_prompts: empty_table_plan("user_prompts"),
883        pending_messages: empty_table_plan("pending_messages"),
884        warnings: Vec::new(),
885        next_commands: vec![
886            "tsift memory import-claude-mem . --all --apply --json".to_string(),
887            "tsift graph-db --path . --json refresh".to_string(),
888        ],
889    };
890
891    if !plan.exists {
892        plan.warnings
893            .push("claude-mem database not found; pass --db to inspect another path".to_string());
894        return Ok(plan);
895    }
896
897    let conn = Connection::open_with_flags(
898        db_path,
899        OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_URI,
900    )
901    .with_context(|| format!("open claude-mem db {}", db_path.display()))?;
902    plan.readable = true;
903    plan.observations = inspect_table(&conn, "observations", true, None)?;
904    plan.session_summaries = inspect_table(&conn, "session_summaries", true, None)?;
905    plan.user_prompts = inspect_table(&conn, "user_prompts", true, None)?;
906    plan.pending_messages = inspect_table(
907        &conn,
908        "pending_messages",
909        false,
910        Some(PENDING_MESSAGES_EXCLUSION_REASON),
911    )?;
912    if plan.pending_messages.rows > 0 {
913        plan.warnings
914            .push(PENDING_MESSAGES_EXCLUSION_REASON.to_string());
915    }
916
917    if !plan.chroma_present {
918        plan.warnings
919            .push("chroma directory was not found next to the claude-mem SQLite DB".to_string());
920    }
921
922    Ok(plan)
923}
924
925fn inspect_table(
926    conn: &Connection,
927    table: &str,
928    supported: bool,
929    unsupported_reason: Option<&str>,
930) -> Result<ClaudeMemTablePlan> {
931    if !table_exists(conn, table)? {
932        return Ok(empty_table_plan(table));
933    }
934    let rows: i64 = conn.query_row(&format!("SELECT COUNT(*) FROM {table}"), [], |row| {
935        row.get(0)
936    })?;
937    let columns = table_columns(conn, table)?;
938    Ok(ClaudeMemTablePlan {
939        table: table.to_string(),
940        supported,
941        rows: rows as usize,
942        columns,
943        unsupported_reason: unsupported_reason.map(str::to_string),
944    })
945}
946
947fn table_exists(conn: &Connection, table: &str) -> Result<bool> {
948    let exists: Option<i64> = conn
949        .query_row(
950            "SELECT 1 FROM sqlite_master WHERE type IN ('table','view') AND name = ?1",
951            [table],
952            |row| row.get(0),
953        )
954        .optional()?;
955    Ok(exists.is_some())
956}
957
958fn table_columns(conn: &Connection, table: &str) -> Result<Vec<String>> {
959    let mut stmt = conn.prepare(&format!("PRAGMA table_info({table})"))?;
960    let mut rows = stmt.query([])?;
961    let mut columns = Vec::new();
962    while let Some(row) = rows.next()? {
963        columns.push(row.get::<_, String>(1)?);
964    }
965    Ok(columns)
966}
967
968#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
969pub struct ClaudeMemImportReport {
970    pub contract_version: String,
971    pub source: String,
972    pub target: String,
973    pub dry_run: bool,
974    #[serde(skip_serializing_if = "Option::is_none")]
975    pub limit_per_table: Option<usize>,
976    pub import_all: bool,
977    pub imported_events: usize,
978    pub already_present_events: usize,
979    pub planned_events: usize,
980    pub event_ids: Vec<String>,
981    pub event_ids_total: usize,
982    pub event_ids_truncated: bool,
983    pub reconciliation: ClaudeMemImportReconciliation,
984    pub plan: ClaudeMemImportPlan,
985}
986
987#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
988pub struct ClaudeMemReadReport {
989    pub contract_version: String,
990    pub source: String,
991    #[serde(skip_serializing_if = "Option::is_none")]
992    pub limit_per_table: Option<usize>,
993    pub import_all: bool,
994    pub events: Vec<MemoryEvent>,
995    pub reconciliation: ClaudeMemImportReconciliation,
996    pub plan: ClaudeMemImportPlan,
997}
998
999pub fn read_claude_mem_events(
1000    source_db_path: &Path,
1001    limit_per_table: Option<usize>,
1002) -> Result<ClaudeMemReadReport> {
1003    let plan = inspect_claude_mem(source_db_path)?;
1004    if !plan.exists || !plan.readable {
1005        let reconciliation =
1006            reconcile_claude_mem_import(&plan, limit_per_table, &[], &BTreeMap::new());
1007        return Ok(ClaudeMemReadReport {
1008            contract_version: MEMORY_CONTRACT_VERSION.to_string(),
1009            source: source_db_path.display().to_string(),
1010            limit_per_table,
1011            import_all: limit_per_table.is_none(),
1012            events: Vec::new(),
1013            reconciliation,
1014            plan,
1015        });
1016    }
1017
1018    let conn = Connection::open_with_flags(
1019        source_db_path,
1020        OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_URI,
1021    )?;
1022    let mut events = Vec::new();
1023    if plan.observations.supported {
1024        events.extend(read_claude_mem_observations(&conn, limit_per_table)?);
1025    }
1026    if plan.session_summaries.supported {
1027        events.extend(read_claude_mem_summaries(&conn, limit_per_table)?);
1028    }
1029    if plan.user_prompts.supported {
1030        events.extend(read_claude_mem_user_prompts(&conn, limit_per_table)?);
1031    }
1032    let reconciliation =
1033        reconcile_claude_mem_import(&plan, limit_per_table, &events, &BTreeMap::new());
1034
1035    Ok(ClaudeMemReadReport {
1036        contract_version: MEMORY_CONTRACT_VERSION.to_string(),
1037        source: source_db_path.display().to_string(),
1038        limit_per_table,
1039        import_all: limit_per_table.is_none(),
1040        events,
1041        reconciliation,
1042        plan,
1043    })
1044}
1045
1046pub fn import_claude_mem(
1047    source_db_path: &Path,
1048    target_memory_db_path: &Path,
1049    limit_per_table: Option<usize>,
1050    dry_run: bool,
1051) -> Result<ClaudeMemImportReport> {
1052    let read_report = read_claude_mem_events(source_db_path, limit_per_table)?;
1053    let plan = read_report.plan;
1054    if !plan.exists || !plan.readable {
1055        let reconciliation =
1056            reconcile_claude_mem_import(&plan, limit_per_table, &[], &BTreeMap::new());
1057        return Ok(ClaudeMemImportReport {
1058            contract_version: MEMORY_CONTRACT_VERSION.to_string(),
1059            source: source_db_path.display().to_string(),
1060            target: target_memory_db_path.display().to_string(),
1061            dry_run,
1062            limit_per_table,
1063            import_all: limit_per_table.is_none(),
1064            imported_events: 0,
1065            already_present_events: 0,
1066            planned_events: 0,
1067            event_ids: Vec::new(),
1068            event_ids_total: 0,
1069            event_ids_truncated: false,
1070            reconciliation,
1071            plan,
1072        });
1073    }
1074
1075    let events = read_report.events;
1076    let planned_events = events.len();
1077    let mut event_ids = Vec::new();
1078    let mut event_ids_total = 0;
1079    let mut write_results = BTreeMap::new();
1080    if !dry_run {
1081        let mut store = MemoryStore::open_or_create(target_memory_db_path)?;
1082        let results = store.insert_events(&events)?;
1083        for (event, result) in events.iter().zip(results) {
1084            record_claude_mem_write(&mut write_results, event, result.inserted);
1085            event_ids_total += 1;
1086            if event_ids.len() < MAX_IMPORT_EVENT_IDS {
1087                event_ids.push(result.id);
1088            }
1089        }
1090    }
1091    let reconciliation =
1092        reconcile_claude_mem_import(&plan, limit_per_table, &events, &write_results);
1093
1094    Ok(ClaudeMemImportReport {
1095        contract_version: MEMORY_CONTRACT_VERSION.to_string(),
1096        source: source_db_path.display().to_string(),
1097        target: target_memory_db_path.display().to_string(),
1098        dry_run,
1099        limit_per_table,
1100        import_all: limit_per_table.is_none(),
1101        imported_events: reconciliation.total_imported_events,
1102        already_present_events: reconciliation.total_already_present_events,
1103        planned_events,
1104        event_ids,
1105        event_ids_total,
1106        event_ids_truncated: event_ids_total > MAX_IMPORT_EVENT_IDS,
1107        reconciliation,
1108        plan,
1109    })
1110}
1111
1112fn read_claude_mem_observations(
1113    conn: &Connection,
1114    limit: Option<usize>,
1115) -> Result<Vec<MemoryEvent>> {
1116    let sql = format!(
1117        r#"
1118        SELECT id, memory_session_id, project, type, title, subtitle, text, facts,
1119               narrative, concepts, prompt_number, discovery_tokens, created_at_epoch,
1120               content_hash
1121        FROM observations
1122        ORDER BY created_at_epoch ASC, id ASC{}
1123        "#,
1124        claude_mem_limit_clause(limit)
1125    );
1126    let mut stmt = conn.prepare(&sql)?;
1127    let rows = stmt.query_map(params_from_iter(limit.map(|value| value as i64)), |row| {
1128        let id: i64 = row.get(0)?;
1129        let session_id: String = row.get(1)?;
1130        let project: String = row.get(2)?;
1131        let observation_type: String = row.get(3)?;
1132        let title: Option<String> = row.get(4)?;
1133        let subtitle: Option<String> = row.get(5)?;
1134        let text: Option<String> = row.get(6)?;
1135        let facts: Option<String> = row.get(7)?;
1136        let narrative: Option<String> = row.get(8)?;
1137        let concepts: Option<String> = row.get(9)?;
1138        let prompt_number: Option<i64> = row.get(10)?;
1139        let discovery_tokens: i64 = row.get(11)?;
1140        let created_at_epoch: i64 = row.get(12)?;
1141        let content_hash: Option<String> = row.get(13)?;
1142        let body = join_non_empty([title, subtitle, text, facts, narrative, concepts]);
1143        let mut event = MemoryEvent::new(
1144            MemoryEventKind::ImportedObservation,
1145            format!("claude-mem:observations:{id}"),
1146            body,
1147        )
1148        .with_session_id(session_id)
1149        .with_observed_at_unix(created_at_epoch)
1150        .with_import("claude-mem", format!("observations:{id}"))
1151        .with_metadata("project", project)
1152        .with_metadata("observation_type", observation_type)
1153        .with_metadata("discovery_tokens", discovery_tokens.to_string());
1154        if let Some(prompt_number) = prompt_number {
1155            event = event.with_metadata("prompt_number", prompt_number.to_string());
1156        }
1157        if let Some(content_hash) = content_hash {
1158            event = event.with_metadata("content_hash", content_hash);
1159        }
1160        Ok(event)
1161    })?;
1162    collect_rows(rows)
1163}
1164
1165fn read_claude_mem_summaries(conn: &Connection, limit: Option<usize>) -> Result<Vec<MemoryEvent>> {
1166    let sql = format!(
1167        r#"
1168        SELECT id, memory_session_id, project, request, investigated, learned,
1169               completed, next_steps, notes, prompt_number, discovery_tokens,
1170               created_at_epoch
1171        FROM session_summaries
1172        ORDER BY created_at_epoch ASC, id ASC{}
1173        "#,
1174        claude_mem_limit_clause(limit)
1175    );
1176    let mut stmt = conn.prepare(&sql)?;
1177    let rows = stmt.query_map(params_from_iter(limit.map(|value| value as i64)), |row| {
1178        let id: i64 = row.get(0)?;
1179        let session_id: String = row.get(1)?;
1180        let project: String = row.get(2)?;
1181        let request: Option<String> = row.get(3)?;
1182        let investigated: Option<String> = row.get(4)?;
1183        let learned: Option<String> = row.get(5)?;
1184        let completed: Option<String> = row.get(6)?;
1185        let next_steps: Option<String> = row.get(7)?;
1186        let notes: Option<String> = row.get(8)?;
1187        let prompt_number: Option<i64> = row.get(9)?;
1188        let discovery_tokens: i64 = row.get(10)?;
1189        let created_at_epoch: i64 = row.get(11)?;
1190        let body = join_non_empty([request, investigated, learned, completed, next_steps, notes]);
1191        let mut event = MemoryEvent::new(
1192            MemoryEventKind::ImportedSessionSummary,
1193            format!("claude-mem:session_summaries:{id}"),
1194            body,
1195        )
1196        .with_session_id(session_id)
1197        .with_observed_at_unix(created_at_epoch)
1198        .with_import("claude-mem", format!("session_summaries:{id}"))
1199        .with_metadata("project", project)
1200        .with_metadata("discovery_tokens", discovery_tokens.to_string());
1201        if let Some(prompt_number) = prompt_number {
1202            event = event.with_metadata("prompt_number", prompt_number.to_string());
1203        }
1204        Ok(event)
1205    })?;
1206    collect_rows(rows)
1207}
1208
1209fn read_claude_mem_user_prompts(
1210    conn: &Connection,
1211    limit: Option<usize>,
1212) -> Result<Vec<MemoryEvent>> {
1213    let sql = format!(
1214        r#"
1215        SELECT id, content_session_id, prompt_number, prompt_text, created_at_epoch
1216        FROM user_prompts
1217        ORDER BY created_at_epoch ASC, id ASC{}
1218        "#,
1219        claude_mem_limit_clause(limit)
1220    );
1221    let mut stmt = conn.prepare(&sql)?;
1222    let rows = stmt.query_map(params_from_iter(limit.map(|value| value as i64)), |row| {
1223        let id: i64 = row.get(0)?;
1224        let session_id: String = row.get(1)?;
1225        let prompt_number: i64 = row.get(2)?;
1226        let prompt_text: String = row.get(3)?;
1227        let created_at_epoch: i64 = row.get(4)?;
1228        Ok(MemoryEvent::new(
1229            MemoryEventKind::ImportedUserPrompt,
1230            format!("claude-mem:user_prompts:{id}"),
1231            prompt_text,
1232        )
1233        .with_session_id(session_id)
1234        .with_observed_at_unix(created_at_epoch)
1235        .with_import("claude-mem", format!("user_prompts:{id}"))
1236        .with_metadata("prompt_number", prompt_number.to_string()))
1237    })?;
1238    collect_rows(rows)
1239}
1240
1241fn claude_mem_limit_clause(limit: Option<usize>) -> &'static str {
1242    if limit.is_some() {
1243        "\n        LIMIT ?1"
1244    } else {
1245        ""
1246    }
1247}
1248
1249fn record_claude_mem_write(
1250    write_results: &mut BTreeMap<String, ClaudeMemTableWriteCounts>,
1251    event: &MemoryEvent,
1252    inserted: bool,
1253) {
1254    let Some(table) = claude_mem_event_table(event) else {
1255        return;
1256    };
1257    let counts = write_results.entry(table.to_string()).or_default();
1258    if inserted {
1259        counts.imported_events += 1;
1260    } else {
1261        counts.already_present_events += 1;
1262    }
1263}
1264
1265#[derive(Debug, Clone, Copy, Default)]
1266struct ClaudeMemTableWriteCounts {
1267    imported_events: usize,
1268    already_present_events: usize,
1269}
1270
1271fn reconcile_claude_mem_import(
1272    plan: &ClaudeMemImportPlan,
1273    limit_per_table: Option<usize>,
1274    events: &[MemoryEvent],
1275    write_results: &BTreeMap<String, ClaudeMemTableWriteCounts>,
1276) -> ClaudeMemImportReconciliation {
1277    let event_counts = claude_mem_event_counts(events);
1278    let observations = reconcile_claude_mem_table(
1279        &plan.observations,
1280        limit_per_table,
1281        event_counts
1282            .get("observations")
1283            .copied()
1284            .unwrap_or_default(),
1285        write_results
1286            .get("observations")
1287            .copied()
1288            .unwrap_or_default(),
1289    );
1290    let session_summaries = reconcile_claude_mem_table(
1291        &plan.session_summaries,
1292        limit_per_table,
1293        event_counts
1294            .get("session_summaries")
1295            .copied()
1296            .unwrap_or_default(),
1297        write_results
1298            .get("session_summaries")
1299            .copied()
1300            .unwrap_or_default(),
1301    );
1302    let user_prompts = reconcile_claude_mem_table(
1303        &plan.user_prompts,
1304        limit_per_table,
1305        event_counts
1306            .get("user_prompts")
1307            .copied()
1308            .unwrap_or_default(),
1309        write_results
1310            .get("user_prompts")
1311            .copied()
1312            .unwrap_or_default(),
1313    );
1314    let total_source_rows =
1315        observations.source_rows + session_summaries.source_rows + user_prompts.source_rows;
1316    let total_read_events =
1317        observations.read_events + session_summaries.read_events + user_prompts.read_events;
1318    let total_planned_events = observations.planned_events
1319        + session_summaries.planned_events
1320        + user_prompts.planned_events;
1321    let total_imported_events = observations.imported_events
1322        + session_summaries.imported_events
1323        + user_prompts.imported_events;
1324    let total_already_present_events = observations.already_present_events
1325        + session_summaries.already_present_events
1326        + user_prompts.already_present_events;
1327    let total_skipped_source_rows = observations.skipped_source_rows
1328        + session_summaries.skipped_source_rows
1329        + user_prompts.skipped_source_rows;
1330    let complete = observations.complete && session_summaries.complete && user_prompts.complete;
1331    ClaudeMemImportReconciliation {
1332        observations,
1333        session_summaries,
1334        user_prompts,
1335        total_source_rows,
1336        total_read_events,
1337        total_planned_events,
1338        total_imported_events,
1339        total_already_present_events,
1340        total_skipped_source_rows,
1341        complete,
1342    }
1343}
1344
1345fn reconcile_claude_mem_table(
1346    table_plan: &ClaudeMemTablePlan,
1347    limit_per_table: Option<usize>,
1348    read_events: usize,
1349    write_counts: ClaudeMemTableWriteCounts,
1350) -> ClaudeMemTableReconciliation {
1351    let source_rows = table_plan.rows;
1352    let skipped_source_rows = source_rows.saturating_sub(read_events);
1353    ClaudeMemTableReconciliation {
1354        table: table_plan.table.clone(),
1355        source_rows,
1356        read_events,
1357        planned_events: read_events,
1358        imported_events: write_counts.imported_events,
1359        already_present_events: write_counts.already_present_events,
1360        skipped_source_rows,
1361        limit_per_table,
1362        complete: skipped_source_rows == 0,
1363    }
1364}
1365
1366fn claude_mem_event_counts(events: &[MemoryEvent]) -> BTreeMap<String, usize> {
1367    let mut counts = BTreeMap::new();
1368    for event in events {
1369        if let Some(table) = claude_mem_event_table(event) {
1370            *counts.entry(table.to_string()).or_insert(0) += 1;
1371        }
1372    }
1373    counts
1374}
1375
1376fn claude_mem_event_table(event: &MemoryEvent) -> Option<&str> {
1377    if event.imported_from.as_deref() != Some("claude-mem") {
1378        return None;
1379    }
1380    event
1381        .imported_id
1382        .as_deref()?
1383        .split_once(':')
1384        .map(|(table, _)| table)
1385}
1386
1387fn collect_rows<T>(rows: impl Iterator<Item = rusqlite::Result<T>>) -> Result<Vec<T>> {
1388    let mut values = Vec::new();
1389    for row in rows {
1390        values.push(row?);
1391    }
1392    Ok(values)
1393}
1394
1395fn join_non_empty(values: impl IntoIterator<Item = Option<String>>) -> String {
1396    let parts: Vec<String> = values
1397        .into_iter()
1398        .flatten()
1399        .map(|value| value.trim().to_string())
1400        .filter(|value| !value.is_empty())
1401        .collect();
1402    parts.join("\n\n")
1403}
1404
1405pub fn memory_graph_node_kinds() -> Vec<&'static str> {
1406    vec![
1407        "memory_session",
1408        "memory_event",
1409        "session",
1410        "source_handle",
1411        "semantic_concept",
1412        "semantic_vector_handle",
1413    ]
1414}
1415
1416pub fn project_memory_events(events: &[MemoryEvent]) -> GraphProjection {
1417    let mut projection = GraphProjection::default();
1418    let mut sessions = BTreeSet::new();
1419
1420    for event in events {
1421        let event_id = event.stable_id();
1422        if let Some(session_id) = &event.session_id
1423            && sessions.insert(session_id.clone())
1424        {
1425            projection.nodes.push(
1426                GraphNode::new(
1427                    format!("memsess:{}", blake3::hash(session_id.as_bytes()).to_hex()),
1428                    "memory_session",
1429                    session_id,
1430                )
1431                .with_property("session_id", session_id)
1432                .with_provenance(GraphProvenance::new("tsift-memory", session_id)),
1433            );
1434        }
1435
1436        let mut node = GraphNode::new(&event_id, "memory_event", event.kind.as_str())
1437            .with_property("event_kind", event.kind.as_str())
1438            .with_property("source_ref", &event.source_ref)
1439            .with_property("token_estimate", event.token_estimate.to_string())
1440            .with_provenance(GraphProvenance::new("tsift-memory", &event.source_ref));
1441        if let Some(imported_from) = &event.imported_from {
1442            node = node.with_property("imported_from", imported_from);
1443        }
1444        if let Some(imported_id) = &event.imported_id {
1445            node = node.with_property("imported_id", imported_id);
1446        }
1447        projection.nodes.push(node);
1448
1449        if let Some(session_id) = &event.session_id {
1450            let session_node_id =
1451                format!("memsess:{}", blake3::hash(session_id.as_bytes()).to_hex());
1452            projection.edges.push(
1453                GraphEdge::new(session_node_id, event_id, "records_memory_event")
1454                    .with_provenance(GraphProvenance::new("tsift-memory", &event.source_ref)),
1455            );
1456        }
1457    }
1458
1459    projection
1460}
1461
1462#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
1463pub struct MemoryQueryPlan {
1464    pub contract_version: String,
1465    pub query: String,
1466    pub limit: usize,
1467    pub max_tokens: usize,
1468    pub estimated_query_tokens: usize,
1469    pub output_contract: Vec<String>,
1470    pub next_commands: Vec<String>,
1471}
1472
1473pub fn plan_memory_query(query: &str, limit: usize, max_tokens: usize) -> Result<MemoryQueryPlan> {
1474    if query.trim().is_empty() {
1475        bail!("memory query must not be empty");
1476    }
1477    Ok(MemoryQueryPlan {
1478        contract_version: MEMORY_CONTRACT_VERSION.to_string(),
1479        query: query.to_string(),
1480        limit,
1481        max_tokens,
1482        estimated_query_tokens: estimate_tokens(query),
1483        output_contract: vec![
1484            "ranked memory_event ids".to_string(),
1485            "source_ref handles for expansion".to_string(),
1486            "graph node ids for neighborhood projection".to_string(),
1487            "token estimates for every returned packet".to_string(),
1488        ],
1489        next_commands: vec![
1490            "tsift memory status . --json".to_string(),
1491            "tsift graph-db --path . --json related '<query>'".to_string(),
1492        ],
1493    })
1494}
1495
1496#[cfg(test)]
1497mod tests {
1498    use super::*;
1499    use tempfile::TempDir;
1500
1501    #[test]
1502    fn handoff_plan_defers_oversized_events_before_model_call() {
1503        let small = MemoryEvent::new(MemoryEventKind::PromptTarget, "session.md", "short");
1504        let large = MemoryEvent::new(
1505            MemoryEventKind::ToolResultArtifact,
1506            "artifact:log",
1507            "x".repeat(10_000),
1508        );
1509        let plan = plan_capture_handoff(
1510            &[small, large],
1511            MemoryBudget {
1512                max_prompt_tokens: 1000,
1513                reserve_tokens: 100,
1514                max_event_tokens: 500,
1515            },
1516        );
1517        assert_eq!(plan.status, "split_required");
1518        assert_eq!(plan.included_events.len(), 1);
1519        assert_eq!(
1520            plan.deferred_events[0].reason,
1521            "event_exceeds_max_event_tokens"
1522        );
1523    }
1524
1525    #[test]
1526    fn memory_store_initializes_schema_and_dedupes_imported_events() {
1527        let dir = TempDir::new().unwrap();
1528        let db = dir.path().join("memory.db");
1529        let store = MemoryStore::open_or_create(&db).unwrap();
1530        let event = MemoryEvent::new(
1531            MemoryEventKind::ImportedObservation,
1532            "claude-mem:observations:1",
1533            "fact",
1534        )
1535        .with_session_id("session-a")
1536        .with_import("claude-mem", "observations:1");
1537
1538        let first = store.insert_event(&event).unwrap();
1539        let second = store.insert_event(&event).unwrap();
1540        assert_eq!(first, second);
1541        assert_eq!(store.event_count().unwrap(), 1);
1542    }
1543
1544    #[test]
1545    fn claude_mem_pending_messages_are_reported_but_not_imported() {
1546        let dir = TempDir::new().unwrap();
1547        let db = dir.path().join("claude-mem.db");
1548        let conn = Connection::open(&db).unwrap();
1549        conn.execute_batch(
1550            r#"
1551            CREATE TABLE pending_messages (
1552              id INTEGER PRIMARY KEY,
1553              session_db_id INTEGER NOT NULL,
1554              content_session_id TEXT NOT NULL,
1555              message_type TEXT NOT NULL CHECK(message_type IN ('observation', 'summarize')),
1556              tool_name TEXT,
1557              tool_input TEXT,
1558              tool_response TEXT,
1559              cwd TEXT,
1560              last_user_message TEXT,
1561              last_assistant_message TEXT,
1562              prompt_number INTEGER,
1563              status TEXT NOT NULL DEFAULT 'pending',
1564              retry_count INTEGER NOT NULL DEFAULT 0,
1565              created_at_epoch INTEGER NOT NULL,
1566              started_processing_at_epoch INTEGER,
1567              completed_at_epoch INTEGER,
1568              failed_at_epoch INTEGER
1569            );
1570            INSERT INTO pending_messages (
1571              id, session_db_id, content_session_id, message_type, tool_name,
1572              tool_input, tool_response, cwd, last_user_message,
1573              last_assistant_message, prompt_number, status, retry_count,
1574              created_at_epoch
1575            ) VALUES (
1576              1, 10, 'session-a', 'observation', 'bash',
1577              '{"cmd":"cat large.log"}', 'raw tool response', '/repo',
1578              'run tests', 'done', 7, 'pending', 0, 1700000000
1579            );
1580            "#,
1581        )
1582        .unwrap();
1583
1584        let plan = inspect_claude_mem(&db).unwrap();
1585        assert_eq!(plan.pending_messages.rows, 1);
1586        assert!(!plan.pending_messages.supported);
1587        assert_eq!(
1588            plan.pending_messages.unsupported_reason.as_deref(),
1589            Some(PENDING_MESSAGES_EXCLUSION_REASON)
1590        );
1591        assert!(
1592            plan.warnings
1593                .iter()
1594                .any(|warning| warning.contains("pending_messages"))
1595        );
1596
1597        let read_report = read_claude_mem_events(&db, Some(100)).unwrap();
1598        assert!(read_report.events.is_empty());
1599    }
1600
1601    #[test]
1602    fn claude_mem_import_all_reconciles_supported_table_counts() {
1603        let dir = TempDir::new().unwrap();
1604        let source = dir.path().join("claude-mem.db");
1605        let target = dir.path().join("memory.db");
1606        let conn = Connection::open(&source).unwrap();
1607        create_supported_claude_mem_fixture(&conn);
1608
1609        let dry_run = import_claude_mem(&source, &target, None, true).unwrap();
1610        assert!(dry_run.import_all);
1611        assert!(dry_run.reconciliation.complete);
1612        assert_eq!(dry_run.reconciliation.total_source_rows, 6);
1613        assert_eq!(dry_run.reconciliation.total_read_events, 6);
1614        assert_eq!(dry_run.reconciliation.total_skipped_source_rows, 0);
1615        assert_eq!(dry_run.reconciliation.observations.source_rows, 2);
1616        assert_eq!(dry_run.reconciliation.session_summaries.source_rows, 1);
1617        assert_eq!(dry_run.reconciliation.user_prompts.source_rows, 3);
1618
1619        let applied = import_claude_mem(&source, &target, None, false).unwrap();
1620        assert_eq!(applied.planned_events, 6);
1621        assert_eq!(applied.imported_events, 6);
1622        assert_eq!(applied.already_present_events, 0);
1623        assert_eq!(applied.event_ids_total, 6);
1624        assert_eq!(applied.event_ids.len(), 6);
1625        assert!(!applied.event_ids_truncated);
1626        assert_eq!(
1627            MemoryStore::open_or_create(&target)
1628                .unwrap()
1629                .event_count()
1630                .unwrap(),
1631            6
1632        );
1633
1634        let second_apply = import_claude_mem(&source, &target, None, false).unwrap();
1635        assert_eq!(second_apply.imported_events, 0);
1636        assert_eq!(second_apply.already_present_events, 6);
1637        assert_eq!(second_apply.reconciliation.total_already_present_events, 6);
1638    }
1639
1640    #[test]
1641    fn claude_mem_limited_import_reports_skipped_source_rows() {
1642        let dir = TempDir::new().unwrap();
1643        let source = dir.path().join("claude-mem.db");
1644        let conn = Connection::open(&source).unwrap();
1645        create_supported_claude_mem_fixture(&conn);
1646
1647        let read_report = read_claude_mem_events(&source, Some(1)).unwrap();
1648        assert!(!read_report.import_all);
1649        assert!(!read_report.reconciliation.complete);
1650        assert_eq!(read_report.reconciliation.total_source_rows, 6);
1651        assert_eq!(read_report.reconciliation.total_read_events, 3);
1652        assert_eq!(read_report.reconciliation.total_skipped_source_rows, 3);
1653        assert_eq!(
1654            read_report.reconciliation.observations.skipped_source_rows,
1655            1
1656        );
1657        assert_eq!(
1658            read_report
1659                .reconciliation
1660                .session_summaries
1661                .skipped_source_rows,
1662            0
1663        );
1664        assert_eq!(
1665            read_report.reconciliation.user_prompts.skipped_source_rows,
1666            2
1667        );
1668    }
1669
1670    #[test]
1671    fn claude_mem_import_caps_reported_event_ids() {
1672        let dir = TempDir::new().unwrap();
1673        let source = dir.path().join("claude-mem.db");
1674        let target = dir.path().join("memory.db");
1675        let conn = Connection::open(&source).unwrap();
1676        conn.execute_batch(
1677            r#"
1678            CREATE TABLE user_prompts (
1679              id INTEGER PRIMARY KEY,
1680              content_session_id TEXT NOT NULL,
1681              prompt_number INTEGER NOT NULL,
1682              prompt_text TEXT NOT NULL,
1683              created_at_epoch INTEGER NOT NULL
1684            );
1685            "#,
1686        )
1687        .unwrap();
1688        for id in 1..=(MAX_IMPORT_EVENT_IDS + 1) {
1689            conn.execute(
1690                "INSERT INTO user_prompts (id, content_session_id, prompt_number, prompt_text, created_at_epoch) VALUES (?1, 'session-a', ?2, ?3, ?4)",
1691                params![id as i64, id as i64, format!("prompt {id}"), 1700000000_i64 + id as i64],
1692            )
1693            .unwrap();
1694        }
1695
1696        let applied = import_claude_mem(&source, &target, None, false).unwrap();
1697        assert_eq!(applied.planned_events, MAX_IMPORT_EVENT_IDS + 1);
1698        assert_eq!(applied.event_ids_total, MAX_IMPORT_EVENT_IDS + 1);
1699        assert_eq!(applied.event_ids.len(), MAX_IMPORT_EVENT_IDS);
1700        assert!(applied.event_ids_truncated);
1701        assert!(applied.reconciliation.complete);
1702    }
1703
1704    fn create_supported_claude_mem_fixture(conn: &Connection) {
1705        conn.execute_batch(
1706            r#"
1707            CREATE TABLE observations (
1708              id INTEGER PRIMARY KEY,
1709              memory_session_id TEXT NOT NULL,
1710              project TEXT NOT NULL,
1711              type TEXT NOT NULL,
1712              title TEXT,
1713              subtitle TEXT,
1714              text TEXT,
1715              facts TEXT,
1716              narrative TEXT,
1717              concepts TEXT,
1718              prompt_number INTEGER,
1719              discovery_tokens INTEGER NOT NULL,
1720              created_at_epoch INTEGER NOT NULL,
1721              content_hash TEXT
1722            );
1723            CREATE TABLE session_summaries (
1724              id INTEGER PRIMARY KEY,
1725              memory_session_id TEXT NOT NULL,
1726              project TEXT NOT NULL,
1727              request TEXT,
1728              investigated TEXT,
1729              learned TEXT,
1730              completed TEXT,
1731              next_steps TEXT,
1732              notes TEXT,
1733              prompt_number INTEGER,
1734              discovery_tokens INTEGER NOT NULL,
1735              created_at_epoch INTEGER NOT NULL
1736            );
1737            CREATE TABLE user_prompts (
1738              id INTEGER PRIMARY KEY,
1739              content_session_id TEXT NOT NULL,
1740              prompt_number INTEGER NOT NULL,
1741              prompt_text TEXT NOT NULL,
1742              created_at_epoch INTEGER NOT NULL
1743            );
1744            INSERT INTO observations (
1745              id, memory_session_id, project, type, title, subtitle, text, facts,
1746              narrative, concepts, prompt_number, discovery_tokens, created_at_epoch,
1747              content_hash
1748            ) VALUES
1749              (1, 'session-a', 'agent-loop', 'fact', 'Title A', NULL, 'Text A', NULL, NULL, 'tsift', 1, 42, 1700000001, 'hash-a'),
1750              (2, 'session-b', 'agent-loop', 'fact', 'Title B', NULL, 'Text B', NULL, NULL, 'memory', 2, 43, 1700000002, 'hash-b');
1751            INSERT INTO session_summaries (
1752              id, memory_session_id, project, request, investigated, learned,
1753              completed, next_steps, notes, prompt_number, discovery_tokens,
1754              created_at_epoch
1755            ) VALUES (
1756              1, 'session-a', 'agent-loop', 'replace claude-mem', 'tables',
1757              'all rows', 'imported', 'refresh graph', NULL, 3, 44, 1700000003
1758            );
1759            INSERT INTO user_prompts (
1760              id, content_session_id, prompt_number, prompt_text, created_at_epoch
1761            ) VALUES
1762              (1, 'session-a', 1, 'first prompt', 1700000004),
1763              (2, 'session-a', 2, 'second prompt', 1700000005),
1764              (3, 'session-b', 1, 'third prompt', 1700000006);
1765            "#,
1766        )
1767        .unwrap();
1768    }
1769
1770    #[test]
1771    fn read_memory_events_round_trips_closeout_events() {
1772        let dir = TempDir::new().unwrap();
1773        let db = dir.path().join("memory.db");
1774        let store = MemoryStore::open_or_create(&db).unwrap();
1775        for event in agent_doc_closeout_events(
1776            Path::new("tasks/software/tsift.md"),
1777            "do [#tsiftmemhooks]",
1778            "wired closeout capture",
1779            Some("abc123"),
1780            "ok",
1781        ) {
1782            store.insert_event(&event).unwrap();
1783        }
1784
1785        let events = read_memory_events(&db, 10).unwrap();
1786        assert!(events.iter().any(|event| {
1787            event.kind == MemoryEventKind::PromptTarget
1788                && event.text == "do [#tsiftmemhooks]"
1789                && event.session_id.as_deref() == Some("tasks/software/tsift.md")
1790        }));
1791        assert!(events.iter().any(|event| {
1792            event.kind == MemoryEventKind::CloseoutProof
1793                && event.metadata.get("commit_hash") == Some(&"abc123".to_string())
1794        }));
1795    }
1796
1797    #[test]
1798    fn graph_projection_links_events_to_sessions() {
1799        let event = MemoryEvent::new(MemoryEventKind::ResponseSummary, "session.md", "done")
1800            .with_session_id("session-a");
1801        let projection = project_memory_events(&[event]);
1802        assert_eq!(projection.nodes.len(), 2);
1803        assert_eq!(projection.edges.len(), 1);
1804        assert!(
1805            projection
1806                .nodes
1807                .iter()
1808                .any(|node| node.kind == "memory_session")
1809        );
1810        assert!(
1811            projection
1812                .nodes
1813                .iter()
1814                .any(|node| node.kind == "memory_event")
1815        );
1816    }
1817
1818    #[test]
1819    fn budget_guard_fails_closed_with_retryable_chunks() {
1820        let report = guard_memory_handoff(
1821            MemoryBudgetGuardInput::new("tool.log", "tool_result", "x".repeat(5_000)),
1822            MemoryBudget {
1823                max_prompt_tokens: 1000,
1824                reserve_tokens: 100,
1825                max_event_tokens: 400,
1826            },
1827        );
1828
1829        assert!(!report.allowed);
1830        assert_eq!(report.status, "blocked_split_required");
1831        assert!(report.replacement.is_some());
1832        assert!(report.retryable_chunk_plan.len() > 1);
1833        assert!(
1834            report
1835                .retryable_chunk_plan
1836                .iter()
1837                .all(|chunk| chunk.token_estimate <= 400)
1838        );
1839    }
1840
1841    #[test]
1842    fn budget_guard_replaces_transcripts_with_session_review_commands() {
1843        let report = guard_memory_handoff(
1844            MemoryBudgetGuardInput::new("session.jsonl", "transcript", "x".repeat(5_000)),
1845            MemoryBudget {
1846                max_prompt_tokens: 1000,
1847                reserve_tokens: 100,
1848                max_event_tokens: 400,
1849            },
1850        );
1851        let replacement = report.replacement.unwrap();
1852        assert_eq!(
1853            replacement.strategy,
1854            "replace_raw_transcript_with_session_review_or_context_pack_handle"
1855        );
1856        assert!(
1857            replacement
1858                .session_review_command
1859                .contains("session-review")
1860        );
1861        assert!(replacement.context_command.contains("context-pack"));
1862    }
1863}