Skip to main content

tsift_memory/
lib.rs

1use anyhow::{Context, Result, bail};
2use rusqlite::{Connection, OpenFlags, OptionalExtension, params, params_from_iter};
3use serde::{Deserialize, Serialize};
4use std::collections::BTreeMap;
5use std::path::{Path, PathBuf};
6use tsift_core::{GraphEdge, GraphFreshness, GraphNode, GraphProjection, GraphProvenance};
7
8pub const MEMORY_CONTRACT_VERSION: &str = "tsift-memory-v1";
9pub const MEMORY_SCHEMA_VERSION: i64 = 1;
10pub const DEFAULT_MAX_PROMPT_TOKENS: usize = 4096;
11pub const DEFAULT_RESERVE_TOKENS: usize = 512;
12pub const DEFAULT_MAX_EVENT_TOKENS: usize = 1536;
13pub const MEMORY_BUDGET_GUARD_CONTRACT_VERSION: &str = "tsift-memory-budget-guard-v1";
14
15const MAX_IMPORT_EVENT_IDS: usize = 100;
16
17const MEMORY_SCHEMA_SQL: &str = r#"
18PRAGMA foreign_keys = ON;
19
20CREATE TABLE IF NOT EXISTS memory_schema_versions (
21  version INTEGER PRIMARY KEY,
22  applied_at_unix INTEGER NOT NULL
23);
24
25INSERT OR IGNORE INTO memory_schema_versions(version, applied_at_unix)
26VALUES (1, strftime('%s','now'));
27
28CREATE TABLE IF NOT EXISTS memory_events (
29  id TEXT PRIMARY KEY,
30  kind TEXT NOT NULL,
31  session_id TEXT,
32  source_ref TEXT NOT NULL,
33  text TEXT NOT NULL,
34  metadata_json TEXT NOT NULL DEFAULT '{}',
35  observed_at_unix INTEGER,
36  token_estimate INTEGER NOT NULL,
37  imported_from TEXT,
38  imported_id TEXT,
39  created_at_unix INTEGER NOT NULL DEFAULT (strftime('%s','now'))
40);
41
42CREATE INDEX IF NOT EXISTS idx_memory_events_kind
43ON memory_events(kind);
44
45CREATE INDEX IF NOT EXISTS idx_memory_events_session
46ON memory_events(session_id);
47
48CREATE UNIQUE INDEX IF NOT EXISTS idx_memory_events_import_source
49ON memory_events(imported_from, imported_id)
50WHERE imported_from IS NOT NULL AND imported_id IS NOT NULL;
51
52CREATE TABLE IF NOT EXISTS memory_session_summaries (
53  id TEXT PRIMARY KEY,
54  session_id TEXT NOT NULL,
55  summary TEXT NOT NULL,
56  metadata_json TEXT NOT NULL DEFAULT '{}',
57  observed_at_unix INTEGER,
58  token_estimate INTEGER NOT NULL,
59  created_at_unix INTEGER NOT NULL DEFAULT (strftime('%s','now'))
60);
61
62CREATE TABLE IF NOT EXISTS memory_artifacts (
63  id TEXT PRIMARY KEY,
64  session_id TEXT,
65  source_ref TEXT NOT NULL,
66  artifact_kind TEXT NOT NULL,
67  path TEXT,
68  handle TEXT,
69  metadata_json TEXT NOT NULL DEFAULT '{}',
70  token_estimate INTEGER NOT NULL DEFAULT 0,
71  created_at_unix INTEGER NOT NULL DEFAULT (strftime('%s','now'))
72);
73
74CREATE TABLE IF NOT EXISTS memory_tool_spans (
75  id TEXT PRIMARY KEY,
76  session_id TEXT,
77  tool_name TEXT NOT NULL,
78  input_artifact_id TEXT,
79  output_artifact_id TEXT,
80  status TEXT NOT NULL,
81  metadata_json TEXT NOT NULL DEFAULT '{}',
82  started_at_unix INTEGER,
83  completed_at_unix INTEGER,
84  FOREIGN KEY(input_artifact_id) REFERENCES memory_artifacts(id),
85  FOREIGN KEY(output_artifact_id) REFERENCES memory_artifacts(id)
86);
87
88CREATE TABLE IF NOT EXISTS memory_embeddings (
89  id TEXT PRIMARY KEY,
90  owner_kind TEXT NOT NULL,
91  owner_id TEXT NOT NULL,
92  provider TEXT NOT NULL,
93  model TEXT NOT NULL,
94  vector_ref TEXT NOT NULL,
95  dimensions INTEGER,
96  metadata_json TEXT NOT NULL DEFAULT '{}',
97  created_at_unix INTEGER NOT NULL DEFAULT (strftime('%s','now'))
98);
99
100CREATE TABLE IF NOT EXISTS memory_graph_links (
101  id TEXT PRIMARY KEY,
102  memory_event_id TEXT NOT NULL,
103  graph_node_id TEXT NOT NULL,
104  graph_edge_id TEXT,
105  link_kind TEXT NOT NULL,
106  metadata_json TEXT NOT NULL DEFAULT '{}',
107  created_at_unix INTEGER NOT NULL DEFAULT (strftime('%s','now')),
108  FOREIGN KEY(memory_event_id) REFERENCES memory_events(id)
109);
110
111CREATE TABLE IF NOT EXISTS memory_import_runs (
112  id TEXT PRIMARY KEY,
113  source TEXT NOT NULL,
114  source_ref TEXT NOT NULL,
115  status TEXT NOT NULL,
116  imported_events INTEGER NOT NULL DEFAULT 0,
117  warnings_json TEXT NOT NULL DEFAULT '[]',
118  started_at_unix INTEGER NOT NULL DEFAULT (strftime('%s','now')),
119  completed_at_unix INTEGER
120);
121"#;
122
123pub fn memory_schema_sql() -> &'static str {
124    MEMORY_SCHEMA_SQL
125}
126
127pub fn default_memory_db_path(project_root: &Path) -> PathBuf {
128    project_root.join(".tsift").join("memory.db")
129}
130
131pub fn default_claude_mem_db_path() -> Option<PathBuf> {
132    std::env::var_os("HOME")
133        .map(PathBuf::from)
134        .map(|home| home.join(".claude-mem").join("claude-mem.db"))
135}
136
137pub fn estimate_tokens(text: &str) -> usize {
138    let bytes = text.trim().len();
139    if bytes == 0 { 0 } else { bytes.div_ceil(4) }
140}
141
142#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
143#[serde(rename_all = "snake_case")]
144pub enum MemoryEventKind {
145    PromptTarget,
146    ToolCall,
147    ToolResultArtifact,
148    ResponseSummary,
149    CloseoutProof,
150    SessionCheck,
151    ImportedObservation,
152    ImportedSessionSummary,
153    ImportedUserPrompt,
154}
155
156impl MemoryEventKind {
157    pub fn as_str(self) -> &'static str {
158        match self {
159            Self::PromptTarget => "prompt_target",
160            Self::ToolCall => "tool_call",
161            Self::ToolResultArtifact => "tool_result_artifact",
162            Self::ResponseSummary => "response_summary",
163            Self::CloseoutProof => "closeout_proof",
164            Self::SessionCheck => "session_check",
165            Self::ImportedObservation => "imported_observation",
166            Self::ImportedSessionSummary => "imported_session_summary",
167            Self::ImportedUserPrompt => "imported_user_prompt",
168        }
169    }
170
171    pub fn parse(raw: &str) -> Result<Self> {
172        match raw {
173            "prompt_target" => Ok(Self::PromptTarget),
174            "tool_call" => Ok(Self::ToolCall),
175            "tool_result_artifact" => Ok(Self::ToolResultArtifact),
176            "response_summary" => Ok(Self::ResponseSummary),
177            "closeout_proof" => Ok(Self::CloseoutProof),
178            "session_check" => Ok(Self::SessionCheck),
179            "imported_observation" => Ok(Self::ImportedObservation),
180            "imported_session_summary" => Ok(Self::ImportedSessionSummary),
181            "imported_user_prompt" => Ok(Self::ImportedUserPrompt),
182            other => bail!("unsupported memory event kind `{other}`"),
183        }
184    }
185}
186
187#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
188pub struct MemoryEvent {
189    pub kind: MemoryEventKind,
190    #[serde(skip_serializing_if = "Option::is_none")]
191    pub session_id: Option<String>,
192    pub source_ref: String,
193    pub text: String,
194    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
195    pub metadata: BTreeMap<String, String>,
196    #[serde(skip_serializing_if = "Option::is_none")]
197    pub observed_at_unix: Option<i64>,
198    pub token_estimate: usize,
199    #[serde(skip_serializing_if = "Option::is_none")]
200    pub imported_from: Option<String>,
201    #[serde(skip_serializing_if = "Option::is_none")]
202    pub imported_id: Option<String>,
203}
204
205impl MemoryEvent {
206    pub fn new(
207        kind: MemoryEventKind,
208        source_ref: impl Into<String>,
209        text: impl Into<String>,
210    ) -> Self {
211        let text = text.into();
212        Self {
213            kind,
214            session_id: None,
215            source_ref: source_ref.into(),
216            token_estimate: estimate_tokens(&text),
217            text,
218            metadata: BTreeMap::new(),
219            observed_at_unix: None,
220            imported_from: None,
221            imported_id: None,
222        }
223    }
224
225    pub fn with_session_id(mut self, session_id: impl Into<String>) -> Self {
226        self.session_id = Some(session_id.into());
227        self
228    }
229
230    pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
231        self.metadata.insert(key.into(), value.into());
232        self
233    }
234
235    pub fn with_observed_at_unix(mut self, observed_at_unix: i64) -> Self {
236        self.observed_at_unix = Some(observed_at_unix);
237        self
238    }
239
240    pub fn with_import(mut self, source: impl Into<String>, id: impl Into<String>) -> Self {
241        self.imported_from = Some(source.into());
242        self.imported_id = Some(id.into());
243        self
244    }
245
246    pub fn stable_id(&self) -> String {
247        let raw = serde_json::json!([
248            self.kind.as_str(),
249            self.session_id,
250            self.source_ref,
251            self.text,
252            self.observed_at_unix,
253            self.imported_from,
254            self.imported_id
255        ])
256        .to_string();
257        format!("memevt:{}", blake3::hash(raw.as_bytes()).to_hex())
258    }
259}
260
261#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
262pub struct MemoryBudget {
263    pub max_prompt_tokens: usize,
264    pub reserve_tokens: usize,
265    pub max_event_tokens: usize,
266}
267
268impl Default for MemoryBudget {
269    fn default() -> Self {
270        Self {
271            max_prompt_tokens: DEFAULT_MAX_PROMPT_TOKENS,
272            reserve_tokens: DEFAULT_RESERVE_TOKENS,
273            max_event_tokens: DEFAULT_MAX_EVENT_TOKENS,
274        }
275    }
276}
277
278impl MemoryBudget {
279    pub fn new(max_prompt_tokens: usize) -> Self {
280        Self {
281            max_prompt_tokens,
282            ..Self::default()
283        }
284    }
285
286    pub fn available_tokens(self) -> usize {
287        self.max_prompt_tokens.saturating_sub(self.reserve_tokens)
288    }
289}
290
291#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
292pub struct BudgetedMemoryEvent {
293    pub id: String,
294    pub kind: String,
295    pub source_ref: String,
296    pub token_estimate: usize,
297}
298
299#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
300pub struct DeferredMemoryEvent {
301    pub id: String,
302    pub kind: String,
303    pub source_ref: String,
304    pub token_estimate: usize,
305    pub reason: String,
306    pub recommended_chunks: usize,
307}
308
309#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
310pub struct MemoryHandoffPlan {
311    pub contract_version: String,
312    pub status: String,
313    pub max_prompt_tokens: usize,
314    pub reserve_tokens: usize,
315    pub available_tokens: usize,
316    pub estimated_included_tokens: usize,
317    pub included_events: Vec<BudgetedMemoryEvent>,
318    pub deferred_events: Vec<DeferredMemoryEvent>,
319    pub next_commands: Vec<String>,
320}
321
322pub fn plan_capture_handoff(events: &[MemoryEvent], budget: MemoryBudget) -> MemoryHandoffPlan {
323    let mut used = 0usize;
324    let mut included_events = Vec::new();
325    let mut deferred_events = Vec::new();
326    let available = budget.available_tokens();
327
328    for event in events {
329        let id = event.stable_id();
330        if event.token_estimate > budget.max_event_tokens {
331            deferred_events.push(DeferredMemoryEvent {
332                id,
333                kind: event.kind.as_str().to_string(),
334                source_ref: event.source_ref.clone(),
335                token_estimate: event.token_estimate,
336                reason: "event_exceeds_max_event_tokens".to_string(),
337                recommended_chunks: event
338                    .token_estimate
339                    .div_ceil(budget.max_event_tokens.max(1)),
340            });
341            continue;
342        }
343
344        if used + event.token_estimate <= available {
345            used += event.token_estimate;
346            included_events.push(BudgetedMemoryEvent {
347                id,
348                kind: event.kind.as_str().to_string(),
349                source_ref: event.source_ref.clone(),
350                token_estimate: event.token_estimate,
351            });
352        } else {
353            deferred_events.push(DeferredMemoryEvent {
354                id,
355                kind: event.kind.as_str().to_string(),
356                source_ref: event.source_ref.clone(),
357                token_estimate: event.token_estimate,
358                reason: "handoff_budget_exhausted".to_string(),
359                recommended_chunks: 1,
360            });
361        }
362    }
363
364    MemoryHandoffPlan {
365        contract_version: MEMORY_CONTRACT_VERSION.to_string(),
366        status: if deferred_events.is_empty() {
367            "ready".to_string()
368        } else {
369            "split_required".to_string()
370        },
371        max_prompt_tokens: budget.max_prompt_tokens,
372        reserve_tokens: budget.reserve_tokens,
373        available_tokens: available,
374        estimated_included_tokens: used,
375        included_events,
376        deferred_events,
377        next_commands: vec![
378            "tsift memory handoff-plan '<event text>' --budget-tokens <n> --json".to_string(),
379            "tsift --envelope context-pack <session.md> --budget normal".to_string(),
380        ],
381    }
382}
383
384#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
385pub struct MemoryBudgetGuardInput {
386    pub source_ref: String,
387    pub payload_kind: String,
388    pub text: String,
389}
390
391impl MemoryBudgetGuardInput {
392    pub fn new(
393        source_ref: impl Into<String>,
394        payload_kind: impl Into<String>,
395        text: impl Into<String>,
396    ) -> Self {
397        Self {
398            source_ref: source_ref.into(),
399            payload_kind: payload_kind.into(),
400            text: text.into(),
401        }
402    }
403}
404
405#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
406pub struct MemoryBudgetReplacement {
407    pub strategy: String,
408    pub artifact_ref: String,
409    pub digest_command: String,
410    pub context_command: String,
411    pub session_review_command: String,
412}
413
414#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
415pub struct MemoryRetryChunk {
416    pub index: usize,
417    pub source_ref: String,
418    pub byte_start: usize,
419    pub byte_end: usize,
420    pub token_estimate: usize,
421    pub retry_command: String,
422}
423
424#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
425pub struct MemoryBudgetGuardReport {
426    pub contract_version: String,
427    pub status: String,
428    pub allowed: bool,
429    pub source_ref: String,
430    pub payload_kind: String,
431    pub byte_count: usize,
432    pub estimated_tokens: usize,
433    pub max_prompt_tokens: usize,
434    pub reserve_tokens: usize,
435    pub available_tokens: usize,
436    pub max_chunk_tokens: usize,
437    #[serde(skip_serializing_if = "Option::is_none")]
438    pub replacement: Option<MemoryBudgetReplacement>,
439    pub retryable_chunk_plan: Vec<MemoryRetryChunk>,
440    pub warnings: Vec<String>,
441    pub next_commands: Vec<String>,
442}
443
444pub fn guard_memory_handoff(
445    input: MemoryBudgetGuardInput,
446    budget: MemoryBudget,
447) -> MemoryBudgetGuardReport {
448    let estimated_tokens = estimate_tokens(&input.text);
449    let available_tokens = budget.available_tokens();
450    let allowed =
451        estimated_tokens <= available_tokens && estimated_tokens <= budget.max_event_tokens;
452    let replacement = (!allowed).then(|| replacement_for_budget_guard(&input));
453    let retryable_chunk_plan = if allowed {
454        Vec::new()
455    } else {
456        retry_chunks_for_budget_guard(&input, budget.max_event_tokens.min(available_tokens).max(1))
457    };
458    let mut warnings = Vec::new();
459    if estimated_tokens > available_tokens {
460        warnings.push("payload_exceeds_available_prompt_budget".to_string());
461    }
462    if estimated_tokens > budget.max_event_tokens {
463        warnings.push("payload_exceeds_max_chunk_tokens".to_string());
464    }
465
466    MemoryBudgetGuardReport {
467        contract_version: MEMORY_BUDGET_GUARD_CONTRACT_VERSION.to_string(),
468        status: if allowed {
469            "ready".to_string()
470        } else {
471            "blocked_split_required".to_string()
472        },
473        allowed,
474        source_ref: input.source_ref.clone(),
475        payload_kind: input.payload_kind.clone(),
476        byte_count: input.text.len(),
477        estimated_tokens,
478        max_prompt_tokens: budget.max_prompt_tokens,
479        reserve_tokens: budget.reserve_tokens,
480        available_tokens,
481        max_chunk_tokens: budget.max_event_tokens,
482        replacement,
483        retryable_chunk_plan,
484        warnings,
485        next_commands: vec![
486            format!(
487                "tsift memory budget-guard --file {} --json",
488                shell_quote(&input.source_ref)
489            ),
490            "tsift --envelope context-pack <session.md> --budget normal".to_string(),
491            "tsift --envelope session-review <session.md> --next-context --budget normal"
492                .to_string(),
493        ],
494    }
495}
496
497fn replacement_for_budget_guard(input: &MemoryBudgetGuardInput) -> MemoryBudgetReplacement {
498    let quoted_ref = shell_quote(&input.source_ref);
499    let is_transcript = matches!(
500        input.payload_kind.as_str(),
501        "transcript" | "session" | "session_transcript" | "agent_doc"
502    ) || input.source_ref.ends_with(".jsonl")
503        || input.source_ref.ends_with(".md");
504    let strategy = if is_transcript {
505        "replace_raw_transcript_with_session_review_or_context_pack_handle"
506    } else {
507        "replace_raw_tool_or_log_payload_with_digest_artifact_handle"
508    };
509    MemoryBudgetReplacement {
510        strategy: strategy.to_string(),
511        artifact_ref: format!(
512            "artifact:{}",
513            blake3::hash(input.source_ref.as_bytes()).to_hex()
514        ),
515        digest_command: if is_transcript {
516            format!("tsift --envelope session-review {quoted_ref} --next-context --budget normal")
517        } else {
518            format!("tsift log-digest --path . --input {quoted_ref} --json")
519        },
520        context_command: format!("tsift --envelope context-pack {quoted_ref} --budget normal"),
521        session_review_command: format!(
522            "tsift --envelope session-review {quoted_ref} --next-context --budget normal"
523        ),
524    }
525}
526
527fn retry_chunks_for_budget_guard(
528    input: &MemoryBudgetGuardInput,
529    max_chunk_tokens: usize,
530) -> Vec<MemoryRetryChunk> {
531    let byte_budget = max_chunk_tokens.saturating_mul(4).max(1);
532    let mut chunks = Vec::new();
533    let mut start = 0usize;
534    while start < input.text.len() {
535        let mut end = (start + byte_budget).min(input.text.len());
536        while end > start && !input.text.is_char_boundary(end) {
537            end -= 1;
538        }
539        if end == start {
540            if let Some((offset, ch)) = input.text[start..].char_indices().next() {
541                end = start + offset + ch.len_utf8();
542            } else {
543                break;
544            }
545        }
546        let token_estimate = estimate_tokens(&input.text[start..end]);
547        let index = chunks.len() + 1;
548        chunks.push(MemoryRetryChunk {
549            index,
550            source_ref: format!("{}#chunk-{index}", input.source_ref),
551            byte_start: start,
552            byte_end: end,
553            token_estimate,
554            retry_command: retry_chunk_command(input, index, start, end),
555        });
556        start = end;
557    }
558    chunks
559}
560
561fn retry_chunk_command(
562    input: &MemoryBudgetGuardInput,
563    index: usize,
564    byte_start: usize,
565    byte_end: usize,
566) -> String {
567    let chunk_ref = format!("{}#chunk-{index}", input.source_ref);
568    if input.source_ref == "inline" {
569        format!(
570            "tsift memory budget-guard --text '<chunk {index} payload>' --source-ref {} --budget-tokens <n> --json",
571            shell_quote(&chunk_ref)
572        )
573    } else {
574        format!(
575            "tsift memory budget-guard --file {} --byte-start {byte_start} --byte-end {byte_end} --source-ref {} --budget-tokens <n> --json",
576            shell_quote(&input.source_ref),
577            shell_quote(&chunk_ref)
578        )
579    }
580}
581
582fn shell_quote(s: &str) -> String {
583    if s.chars()
584        .all(|c| c.is_alphanumeric() || matches!(c, '_' | '-' | '.' | '/'))
585    {
586        s.to_string()
587    } else {
588        format!("'{}'", s.replace('\'', "'\\''"))
589    }
590}
591
592#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
593pub struct MemoryHookSpec {
594    pub name: String,
595    pub event_kind: String,
596    pub required_fields: Vec<String>,
597    pub budget_behavior: String,
598}
599
600pub fn agent_doc_hook_contract() -> Vec<MemoryHookSpec> {
601    vec![
602        MemoryHookSpec {
603            name: "prompt_target".to_string(),
604            event_kind: MemoryEventKind::PromptTarget.as_str().to_string(),
605            required_fields: vec!["session_path".to_string(), "prompt_target".to_string()],
606            budget_behavior:
607                "capture text plus graph/source handles; never inline raw transcript blobs"
608                    .to_string(),
609        },
610        MemoryHookSpec {
611            name: "tool_result_artifact".to_string(),
612            event_kind: MemoryEventKind::ToolResultArtifact.as_str().to_string(),
613            required_fields: vec!["tool_name".to_string(), "artifact_handle".to_string()],
614            budget_behavior: "store artifact handle and digest, not full raw output".to_string(),
615        },
616        MemoryHookSpec {
617            name: "response_summary".to_string(),
618            event_kind: MemoryEventKind::ResponseSummary.as_str().to_string(),
619            required_fields: vec!["response_topic".to_string(), "summary".to_string()],
620            budget_behavior: "summaries are capped before write and linked to source handles"
621                .to_string(),
622        },
623        MemoryHookSpec {
624            name: "closeout_proof".to_string(),
625            event_kind: MemoryEventKind::CloseoutProof.as_str().to_string(),
626            required_fields: vec!["commit_hash".to_string(), "changed_paths".to_string()],
627            budget_behavior: "proof records are structured metadata with compact prose".to_string(),
628        },
629        MemoryHookSpec {
630            name: "session_check".to_string(),
631            event_kind: MemoryEventKind::SessionCheck.as_str().to_string(),
632            required_fields: vec!["status".to_string(), "session_path".to_string()],
633            budget_behavior: "persist pass/fail state and recovery commands".to_string(),
634        },
635    ]
636}
637
638pub fn agent_doc_closeout_events(
639    session_path: &Path,
640    prompt_target: &str,
641    response_summary: &str,
642    commit_hash: Option<&str>,
643    session_check_status: &str,
644) -> Vec<MemoryEvent> {
645    let session_id = session_path.display().to_string();
646    let mut events = vec![
647        MemoryEvent::new(
648            MemoryEventKind::PromptTarget,
649            session_path.display().to_string(),
650            prompt_target,
651        )
652        .with_session_id(session_id.clone()),
653        MemoryEvent::new(
654            MemoryEventKind::ResponseSummary,
655            session_path.display().to_string(),
656            response_summary,
657        )
658        .with_session_id(session_id.clone()),
659        MemoryEvent::new(
660            MemoryEventKind::SessionCheck,
661            session_path.display().to_string(),
662            session_check_status,
663        )
664        .with_session_id(session_id.clone())
665        .with_metadata("status", session_check_status),
666    ];
667
668    if let Some(commit_hash) = commit_hash {
669        events.push(
670            MemoryEvent::new(
671                MemoryEventKind::CloseoutProof,
672                session_path.display().to_string(),
673                commit_hash,
674            )
675            .with_session_id(session_id)
676            .with_metadata("commit_hash", commit_hash),
677        );
678    }
679
680    events
681}
682
683#[derive(Debug, Clone, PartialEq, Eq)]
684pub struct MemoryInsertResult {
685    pub id: String,
686    pub inserted: bool,
687}
688
689pub struct MemoryStore {
690    conn: Connection,
691}
692
693impl MemoryStore {
694    pub fn open_or_create(path: &Path) -> Result<Self> {
695        if let Some(parent) = path.parent() {
696            std::fs::create_dir_all(parent)
697                .with_context(|| format!("create {}", parent.display()))?;
698        }
699        let conn = Connection::open(path).with_context(|| format!("open {}", path.display()))?;
700        conn.execute_batch(MEMORY_SCHEMA_SQL)
701            .with_context(|| format!("initialize {}", path.display()))?;
702        Ok(Self { conn })
703    }
704
705    pub fn insert_event(&self, event: &MemoryEvent) -> Result<String> {
706        Ok(self.insert_event_result(event)?.id)
707    }
708
709    pub fn insert_event_result(&self, event: &MemoryEvent) -> Result<MemoryInsertResult> {
710        insert_event_on(&self.conn, event)
711    }
712
713    pub fn insert_events(&mut self, events: &[MemoryEvent]) -> Result<Vec<MemoryInsertResult>> {
714        let tx = self.conn.transaction()?;
715        let mut results = Vec::with_capacity(events.len());
716        for event in events {
717            results.push(insert_event_on(&tx, event)?);
718        }
719        tx.commit()?;
720        Ok(results)
721    }
722
723    pub fn event_count(&self) -> Result<usize> {
724        let count: i64 = self
725            .conn
726            .query_row("SELECT COUNT(*) FROM memory_events", [], |row| row.get(0))?;
727        Ok(count as usize)
728    }
729}
730
731fn insert_event_on(conn: &Connection, event: &MemoryEvent) -> Result<MemoryInsertResult> {
732    let id = event.stable_id();
733    let metadata_json = serde_json::to_string(&event.metadata)?;
734    let changed = conn.execute(
735        r#"
736            INSERT OR IGNORE INTO memory_events(
737              id, kind, session_id, source_ref, text, metadata_json,
738              observed_at_unix, token_estimate, imported_from, imported_id
739            )
740            VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)
741            "#,
742        params![
743            &id,
744            event.kind.as_str(),
745            event.session_id.as_deref(),
746            &event.source_ref,
747            &event.text,
748            &metadata_json,
749            event.observed_at_unix,
750            event.token_estimate as i64,
751            event.imported_from.as_deref(),
752            event.imported_id.as_deref()
753        ],
754    )?;
755    Ok(MemoryInsertResult {
756        id,
757        inserted: changed > 0,
758    })
759}
760
761pub fn read_memory_events(memory_db_path: &Path, limit: usize) -> Result<Vec<MemoryEvent>> {
762    if !memory_db_path.exists() {
763        return Ok(Vec::new());
764    }
765    let conn = Connection::open_with_flags(
766        memory_db_path,
767        OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_URI,
768    )
769    .with_context(|| format!("open memory db {}", memory_db_path.display()))?;
770    let mut stmt = conn.prepare(
771        r#"
772        SELECT kind, session_id, source_ref, text, metadata_json,
773               observed_at_unix, token_estimate, imported_from, imported_id
774        FROM memory_events
775        ORDER BY COALESCE(observed_at_unix, created_at_unix), id
776        LIMIT ?1
777        "#,
778    )?;
779    let mut rows = stmt.query([limit as i64])?;
780    let mut events = Vec::new();
781    while let Some(row) = rows.next()? {
782        let kind_raw: String = row.get(0)?;
783        let session_id: Option<String> = row.get(1)?;
784        let source_ref: String = row.get(2)?;
785        let text: String = row.get(3)?;
786        let metadata_json: String = row.get(4)?;
787        let observed_at_unix: Option<i64> = row.get(5)?;
788        let token_estimate: i64 = row.get(6)?;
789        let imported_from: Option<String> = row.get(7)?;
790        let imported_id: Option<String> = row.get(8)?;
791        let metadata = serde_json::from_str::<BTreeMap<String, String>>(&metadata_json)
792            .with_context(|| format!("parse memory metadata for {source_ref}"))?;
793        let mut event = MemoryEvent::new(MemoryEventKind::parse(&kind_raw)?, source_ref, text);
794        event.session_id = session_id;
795        event.metadata = metadata;
796        event.observed_at_unix = observed_at_unix;
797        event.token_estimate = token_estimate.max(0) as usize;
798        event.imported_from = imported_from;
799        event.imported_id = imported_id;
800        events.push(event);
801    }
802    Ok(events)
803}
804
805#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
806pub struct ClaudeMemTablePlan {
807    pub table: String,
808    pub supported: bool,
809    pub rows: usize,
810    pub columns: Vec<String>,
811    #[serde(skip_serializing_if = "Option::is_none")]
812    pub unsupported_reason: Option<String>,
813}
814
815#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
816pub struct ClaudeMemImportPlan {
817    pub db_path: String,
818    pub exists: bool,
819    pub readable: bool,
820    #[serde(skip_serializing_if = "Option::is_none")]
821    pub chroma_path: Option<String>,
822    pub chroma_present: bool,
823    pub observations: ClaudeMemTablePlan,
824    pub session_summaries: ClaudeMemTablePlan,
825    pub user_prompts: ClaudeMemTablePlan,
826    pub pending_messages: ClaudeMemTablePlan,
827    pub warnings: Vec<String>,
828    pub next_commands: Vec<String>,
829}
830
831#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
832pub struct ClaudeMemTableReconciliation {
833    pub table: String,
834    pub source_rows: usize,
835    pub read_events: usize,
836    pub planned_events: usize,
837    pub imported_events: usize,
838    pub already_present_events: usize,
839    pub skipped_source_rows: usize,
840    #[serde(skip_serializing_if = "Option::is_none")]
841    pub limit_per_table: Option<usize>,
842    pub complete: bool,
843}
844
845#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
846pub struct ClaudeMemImportReconciliation {
847    pub observations: ClaudeMemTableReconciliation,
848    pub session_summaries: ClaudeMemTableReconciliation,
849    pub user_prompts: ClaudeMemTableReconciliation,
850    pub total_source_rows: usize,
851    pub total_read_events: usize,
852    pub total_planned_events: usize,
853    pub total_imported_events: usize,
854    pub total_already_present_events: usize,
855    pub total_skipped_source_rows: usize,
856    pub complete: bool,
857}
858
859fn empty_table_plan(table: &str) -> ClaudeMemTablePlan {
860    ClaudeMemTablePlan {
861        table: table.to_string(),
862        supported: false,
863        rows: 0,
864        columns: Vec::new(),
865        unsupported_reason: None,
866    }
867}
868
869const PENDING_MESSAGES_EXCLUSION_REASON: &str = "pending_messages is intentionally excluded from claude-mem import because it contains transient queue state and raw tool payload fields, not durable replacement memory";
870
871pub fn inspect_claude_mem(db_path: &Path) -> Result<ClaudeMemImportPlan> {
872    let chroma_path = db_path.parent().map(|parent| parent.join("chroma"));
873    let chroma_present = chroma_path.as_ref().is_some_and(|path| path.exists());
874    let mut plan = ClaudeMemImportPlan {
875        db_path: db_path.display().to_string(),
876        exists: db_path.exists(),
877        readable: false,
878        chroma_path: chroma_path.as_ref().map(|path| path.display().to_string()),
879        chroma_present,
880        observations: empty_table_plan("observations"),
881        session_summaries: empty_table_plan("session_summaries"),
882        user_prompts: empty_table_plan("user_prompts"),
883        pending_messages: empty_table_plan("pending_messages"),
884        warnings: Vec::new(),
885        next_commands: vec![
886            "tsift memory import-claude-mem . --all --apply --json".to_string(),
887            "tsift graph-db --path . --json refresh".to_string(),
888        ],
889    };
890
891    if !plan.exists {
892        plan.warnings
893            .push("claude-mem database not found; pass --db to inspect another path".to_string());
894        return Ok(plan);
895    }
896
897    let conn = Connection::open_with_flags(
898        db_path,
899        OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_URI,
900    )
901    .with_context(|| format!("open claude-mem db {}", db_path.display()))?;
902    plan.readable = true;
903    plan.observations = inspect_table(&conn, "observations", true, None)?;
904    plan.session_summaries = inspect_table(&conn, "session_summaries", true, None)?;
905    plan.user_prompts = inspect_table(&conn, "user_prompts", true, None)?;
906    plan.pending_messages = inspect_table(
907        &conn,
908        "pending_messages",
909        false,
910        Some(PENDING_MESSAGES_EXCLUSION_REASON),
911    )?;
912    if plan.pending_messages.rows > 0 {
913        plan.warnings
914            .push(PENDING_MESSAGES_EXCLUSION_REASON.to_string());
915    }
916
917    if !plan.chroma_present {
918        plan.warnings
919            .push("chroma directory was not found next to the claude-mem SQLite DB".to_string());
920    }
921
922    Ok(plan)
923}
924
925fn inspect_table(
926    conn: &Connection,
927    table: &str,
928    supported: bool,
929    unsupported_reason: Option<&str>,
930) -> Result<ClaudeMemTablePlan> {
931    if !table_exists(conn, table)? {
932        return Ok(empty_table_plan(table));
933    }
934    let rows: i64 = conn.query_row(&format!("SELECT COUNT(*) FROM {table}"), [], |row| {
935        row.get(0)
936    })?;
937    let columns = table_columns(conn, table)?;
938    Ok(ClaudeMemTablePlan {
939        table: table.to_string(),
940        supported,
941        rows: rows as usize,
942        columns,
943        unsupported_reason: unsupported_reason.map(str::to_string),
944    })
945}
946
947fn table_exists(conn: &Connection, table: &str) -> Result<bool> {
948    let exists: Option<i64> = conn
949        .query_row(
950            "SELECT 1 FROM sqlite_master WHERE type IN ('table','view') AND name = ?1",
951            [table],
952            |row| row.get(0),
953        )
954        .optional()?;
955    Ok(exists.is_some())
956}
957
958fn table_columns(conn: &Connection, table: &str) -> Result<Vec<String>> {
959    let mut stmt = conn.prepare(&format!("PRAGMA table_info({table})"))?;
960    let mut rows = stmt.query([])?;
961    let mut columns = Vec::new();
962    while let Some(row) = rows.next()? {
963        columns.push(row.get::<_, String>(1)?);
964    }
965    Ok(columns)
966}
967
968#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
969pub struct ClaudeMemImportReport {
970    pub contract_version: String,
971    pub source: String,
972    pub target: String,
973    pub dry_run: bool,
974    #[serde(skip_serializing_if = "Option::is_none")]
975    pub limit_per_table: Option<usize>,
976    pub import_all: bool,
977    pub imported_events: usize,
978    pub already_present_events: usize,
979    pub planned_events: usize,
980    pub event_ids: Vec<String>,
981    pub event_ids_total: usize,
982    pub event_ids_truncated: bool,
983    pub reconciliation: ClaudeMemImportReconciliation,
984    pub plan: ClaudeMemImportPlan,
985}
986
987#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
988pub struct ClaudeMemReadReport {
989    pub contract_version: String,
990    pub source: String,
991    #[serde(skip_serializing_if = "Option::is_none")]
992    pub limit_per_table: Option<usize>,
993    pub import_all: bool,
994    pub events: Vec<MemoryEvent>,
995    pub reconciliation: ClaudeMemImportReconciliation,
996    pub plan: ClaudeMemImportPlan,
997}
998
999pub fn read_claude_mem_events(
1000    source_db_path: &Path,
1001    limit_per_table: Option<usize>,
1002) -> Result<ClaudeMemReadReport> {
1003    let plan = inspect_claude_mem(source_db_path)?;
1004    if !plan.exists || !plan.readable {
1005        let reconciliation =
1006            reconcile_claude_mem_import(&plan, limit_per_table, &[], &BTreeMap::new());
1007        return Ok(ClaudeMemReadReport {
1008            contract_version: MEMORY_CONTRACT_VERSION.to_string(),
1009            source: source_db_path.display().to_string(),
1010            limit_per_table,
1011            import_all: limit_per_table.is_none(),
1012            events: Vec::new(),
1013            reconciliation,
1014            plan,
1015        });
1016    }
1017
1018    let conn = Connection::open_with_flags(
1019        source_db_path,
1020        OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_URI,
1021    )?;
1022    let mut events = Vec::new();
1023    if plan.observations.supported {
1024        events.extend(read_claude_mem_observations(&conn, limit_per_table)?);
1025    }
1026    if plan.session_summaries.supported {
1027        events.extend(read_claude_mem_summaries(&conn, limit_per_table)?);
1028    }
1029    if plan.user_prompts.supported {
1030        events.extend(read_claude_mem_user_prompts(&conn, limit_per_table)?);
1031    }
1032    let reconciliation =
1033        reconcile_claude_mem_import(&plan, limit_per_table, &events, &BTreeMap::new());
1034
1035    Ok(ClaudeMemReadReport {
1036        contract_version: MEMORY_CONTRACT_VERSION.to_string(),
1037        source: source_db_path.display().to_string(),
1038        limit_per_table,
1039        import_all: limit_per_table.is_none(),
1040        events,
1041        reconciliation,
1042        plan,
1043    })
1044}
1045
1046pub fn import_claude_mem(
1047    source_db_path: &Path,
1048    target_memory_db_path: &Path,
1049    limit_per_table: Option<usize>,
1050    dry_run: bool,
1051) -> Result<ClaudeMemImportReport> {
1052    let read_report = read_claude_mem_events(source_db_path, limit_per_table)?;
1053    let plan = read_report.plan;
1054    if !plan.exists || !plan.readable {
1055        let reconciliation =
1056            reconcile_claude_mem_import(&plan, limit_per_table, &[], &BTreeMap::new());
1057        return Ok(ClaudeMemImportReport {
1058            contract_version: MEMORY_CONTRACT_VERSION.to_string(),
1059            source: source_db_path.display().to_string(),
1060            target: target_memory_db_path.display().to_string(),
1061            dry_run,
1062            limit_per_table,
1063            import_all: limit_per_table.is_none(),
1064            imported_events: 0,
1065            already_present_events: 0,
1066            planned_events: 0,
1067            event_ids: Vec::new(),
1068            event_ids_total: 0,
1069            event_ids_truncated: false,
1070            reconciliation,
1071            plan,
1072        });
1073    }
1074
1075    let events = read_report.events;
1076    let planned_events = events.len();
1077    let mut event_ids = Vec::new();
1078    let mut event_ids_total = 0;
1079    let mut write_results = BTreeMap::new();
1080    if !dry_run {
1081        let mut store = MemoryStore::open_or_create(target_memory_db_path)?;
1082        let results = store.insert_events(&events)?;
1083        for (event, result) in events.iter().zip(results) {
1084            record_claude_mem_write(&mut write_results, event, result.inserted);
1085            event_ids_total += 1;
1086            if event_ids.len() < MAX_IMPORT_EVENT_IDS {
1087                event_ids.push(result.id);
1088            }
1089        }
1090    }
1091    let reconciliation =
1092        reconcile_claude_mem_import(&plan, limit_per_table, &events, &write_results);
1093
1094    Ok(ClaudeMemImportReport {
1095        contract_version: MEMORY_CONTRACT_VERSION.to_string(),
1096        source: source_db_path.display().to_string(),
1097        target: target_memory_db_path.display().to_string(),
1098        dry_run,
1099        limit_per_table,
1100        import_all: limit_per_table.is_none(),
1101        imported_events: reconciliation.total_imported_events,
1102        already_present_events: reconciliation.total_already_present_events,
1103        planned_events,
1104        event_ids,
1105        event_ids_total,
1106        event_ids_truncated: event_ids_total > MAX_IMPORT_EVENT_IDS,
1107        reconciliation,
1108        plan,
1109    })
1110}
1111
1112fn read_claude_mem_observations(
1113    conn: &Connection,
1114    limit: Option<usize>,
1115) -> Result<Vec<MemoryEvent>> {
1116    let sql = format!(
1117        r#"
1118        SELECT id, memory_session_id, project, type, title, subtitle, text, facts,
1119               narrative, concepts, prompt_number, discovery_tokens, created_at_epoch,
1120               content_hash
1121        FROM observations
1122        ORDER BY created_at_epoch ASC, id ASC{}
1123        "#,
1124        claude_mem_limit_clause(limit)
1125    );
1126    let mut stmt = conn.prepare(&sql)?;
1127    let rows = stmt.query_map(params_from_iter(limit.map(|value| value as i64)), |row| {
1128        let id: i64 = row.get(0)?;
1129        let session_id: String = row.get(1)?;
1130        let project: String = row.get(2)?;
1131        let observation_type: String = row.get(3)?;
1132        let title: Option<String> = row.get(4)?;
1133        let subtitle: Option<String> = row.get(5)?;
1134        let text: Option<String> = row.get(6)?;
1135        let facts: Option<String> = row.get(7)?;
1136        let narrative: Option<String> = row.get(8)?;
1137        let concepts: Option<String> = row.get(9)?;
1138        let prompt_number: Option<i64> = row.get(10)?;
1139        let discovery_tokens: i64 = row.get(11)?;
1140        let created_at_epoch: i64 = row.get(12)?;
1141        let content_hash: Option<String> = row.get(13)?;
1142        let body = join_non_empty([title, subtitle, text, facts, narrative, concepts]);
1143        let mut event = MemoryEvent::new(
1144            MemoryEventKind::ImportedObservation,
1145            format!("claude-mem:observations:{id}"),
1146            body,
1147        )
1148        .with_session_id(session_id)
1149        .with_observed_at_unix(created_at_epoch)
1150        .with_import("claude-mem", format!("observations:{id}"))
1151        .with_metadata("project", project)
1152        .with_metadata("observation_type", observation_type)
1153        .with_metadata("discovery_tokens", discovery_tokens.to_string());
1154        if let Some(prompt_number) = prompt_number {
1155            event = event.with_metadata("prompt_number", prompt_number.to_string());
1156        }
1157        if let Some(content_hash) = content_hash {
1158            event = event.with_metadata("content_hash", content_hash);
1159        }
1160        Ok(event)
1161    })?;
1162    collect_rows(rows)
1163}
1164
1165fn read_claude_mem_summaries(conn: &Connection, limit: Option<usize>) -> Result<Vec<MemoryEvent>> {
1166    let sql = format!(
1167        r#"
1168        SELECT id, memory_session_id, project, request, investigated, learned,
1169               completed, next_steps, notes, prompt_number, discovery_tokens,
1170               created_at_epoch
1171        FROM session_summaries
1172        ORDER BY created_at_epoch ASC, id ASC{}
1173        "#,
1174        claude_mem_limit_clause(limit)
1175    );
1176    let mut stmt = conn.prepare(&sql)?;
1177    let rows = stmt.query_map(params_from_iter(limit.map(|value| value as i64)), |row| {
1178        let id: i64 = row.get(0)?;
1179        let session_id: String = row.get(1)?;
1180        let project: String = row.get(2)?;
1181        let request: Option<String> = row.get(3)?;
1182        let investigated: Option<String> = row.get(4)?;
1183        let learned: Option<String> = row.get(5)?;
1184        let completed: Option<String> = row.get(6)?;
1185        let next_steps: Option<String> = row.get(7)?;
1186        let notes: Option<String> = row.get(8)?;
1187        let prompt_number: Option<i64> = row.get(9)?;
1188        let discovery_tokens: i64 = row.get(10)?;
1189        let created_at_epoch: i64 = row.get(11)?;
1190        let body = join_non_empty([request, investigated, learned, completed, next_steps, notes]);
1191        let mut event = MemoryEvent::new(
1192            MemoryEventKind::ImportedSessionSummary,
1193            format!("claude-mem:session_summaries:{id}"),
1194            body,
1195        )
1196        .with_session_id(session_id)
1197        .with_observed_at_unix(created_at_epoch)
1198        .with_import("claude-mem", format!("session_summaries:{id}"))
1199        .with_metadata("project", project)
1200        .with_metadata("discovery_tokens", discovery_tokens.to_string());
1201        if let Some(prompt_number) = prompt_number {
1202            event = event.with_metadata("prompt_number", prompt_number.to_string());
1203        }
1204        Ok(event)
1205    })?;
1206    collect_rows(rows)
1207}
1208
1209fn read_claude_mem_user_prompts(
1210    conn: &Connection,
1211    limit: Option<usize>,
1212) -> Result<Vec<MemoryEvent>> {
1213    let sql = format!(
1214        r#"
1215        SELECT id, content_session_id, prompt_number, prompt_text, created_at_epoch
1216        FROM user_prompts
1217        ORDER BY created_at_epoch ASC, id ASC{}
1218        "#,
1219        claude_mem_limit_clause(limit)
1220    );
1221    let mut stmt = conn.prepare(&sql)?;
1222    let rows = stmt.query_map(params_from_iter(limit.map(|value| value as i64)), |row| {
1223        let id: i64 = row.get(0)?;
1224        let session_id: String = row.get(1)?;
1225        let prompt_number: i64 = row.get(2)?;
1226        let prompt_text: String = row.get(3)?;
1227        let created_at_epoch: i64 = row.get(4)?;
1228        Ok(MemoryEvent::new(
1229            MemoryEventKind::ImportedUserPrompt,
1230            format!("claude-mem:user_prompts:{id}"),
1231            prompt_text,
1232        )
1233        .with_session_id(session_id)
1234        .with_observed_at_unix(created_at_epoch)
1235        .with_import("claude-mem", format!("user_prompts:{id}"))
1236        .with_metadata("prompt_number", prompt_number.to_string()))
1237    })?;
1238    collect_rows(rows)
1239}
1240
1241fn claude_mem_limit_clause(limit: Option<usize>) -> &'static str {
1242    if limit.is_some() {
1243        "\n        LIMIT ?1"
1244    } else {
1245        ""
1246    }
1247}
1248
1249fn record_claude_mem_write(
1250    write_results: &mut BTreeMap<String, ClaudeMemTableWriteCounts>,
1251    event: &MemoryEvent,
1252    inserted: bool,
1253) {
1254    let Some(table) = claude_mem_event_table(event) else {
1255        return;
1256    };
1257    let counts = write_results.entry(table.to_string()).or_default();
1258    if inserted {
1259        counts.imported_events += 1;
1260    } else {
1261        counts.already_present_events += 1;
1262    }
1263}
1264
1265#[derive(Debug, Clone, Copy, Default)]
1266struct ClaudeMemTableWriteCounts {
1267    imported_events: usize,
1268    already_present_events: usize,
1269}
1270
1271fn reconcile_claude_mem_import(
1272    plan: &ClaudeMemImportPlan,
1273    limit_per_table: Option<usize>,
1274    events: &[MemoryEvent],
1275    write_results: &BTreeMap<String, ClaudeMemTableWriteCounts>,
1276) -> ClaudeMemImportReconciliation {
1277    let event_counts = claude_mem_event_counts(events);
1278    let observations = reconcile_claude_mem_table(
1279        &plan.observations,
1280        limit_per_table,
1281        event_counts
1282            .get("observations")
1283            .copied()
1284            .unwrap_or_default(),
1285        write_results
1286            .get("observations")
1287            .copied()
1288            .unwrap_or_default(),
1289    );
1290    let session_summaries = reconcile_claude_mem_table(
1291        &plan.session_summaries,
1292        limit_per_table,
1293        event_counts
1294            .get("session_summaries")
1295            .copied()
1296            .unwrap_or_default(),
1297        write_results
1298            .get("session_summaries")
1299            .copied()
1300            .unwrap_or_default(),
1301    );
1302    let user_prompts = reconcile_claude_mem_table(
1303        &plan.user_prompts,
1304        limit_per_table,
1305        event_counts
1306            .get("user_prompts")
1307            .copied()
1308            .unwrap_or_default(),
1309        write_results
1310            .get("user_prompts")
1311            .copied()
1312            .unwrap_or_default(),
1313    );
1314    let total_source_rows =
1315        observations.source_rows + session_summaries.source_rows + user_prompts.source_rows;
1316    let total_read_events =
1317        observations.read_events + session_summaries.read_events + user_prompts.read_events;
1318    let total_planned_events = observations.planned_events
1319        + session_summaries.planned_events
1320        + user_prompts.planned_events;
1321    let total_imported_events = observations.imported_events
1322        + session_summaries.imported_events
1323        + user_prompts.imported_events;
1324    let total_already_present_events = observations.already_present_events
1325        + session_summaries.already_present_events
1326        + user_prompts.already_present_events;
1327    let total_skipped_source_rows = observations.skipped_source_rows
1328        + session_summaries.skipped_source_rows
1329        + user_prompts.skipped_source_rows;
1330    let complete = observations.complete && session_summaries.complete && user_prompts.complete;
1331    ClaudeMemImportReconciliation {
1332        observations,
1333        session_summaries,
1334        user_prompts,
1335        total_source_rows,
1336        total_read_events,
1337        total_planned_events,
1338        total_imported_events,
1339        total_already_present_events,
1340        total_skipped_source_rows,
1341        complete,
1342    }
1343}
1344
1345fn reconcile_claude_mem_table(
1346    table_plan: &ClaudeMemTablePlan,
1347    limit_per_table: Option<usize>,
1348    read_events: usize,
1349    write_counts: ClaudeMemTableWriteCounts,
1350) -> ClaudeMemTableReconciliation {
1351    let source_rows = table_plan.rows;
1352    let skipped_source_rows = source_rows.saturating_sub(read_events);
1353    ClaudeMemTableReconciliation {
1354        table: table_plan.table.clone(),
1355        source_rows,
1356        read_events,
1357        planned_events: read_events,
1358        imported_events: write_counts.imported_events,
1359        already_present_events: write_counts.already_present_events,
1360        skipped_source_rows,
1361        limit_per_table,
1362        complete: skipped_source_rows == 0,
1363    }
1364}
1365
1366fn claude_mem_event_counts(events: &[MemoryEvent]) -> BTreeMap<String, usize> {
1367    let mut counts = BTreeMap::new();
1368    for event in events {
1369        if let Some(table) = claude_mem_event_table(event) {
1370            *counts.entry(table.to_string()).or_insert(0) += 1;
1371        }
1372    }
1373    counts
1374}
1375
1376fn claude_mem_event_table(event: &MemoryEvent) -> Option<&str> {
1377    if event.imported_from.as_deref() != Some("claude-mem") {
1378        return None;
1379    }
1380    event
1381        .imported_id
1382        .as_deref()?
1383        .split_once(':')
1384        .map(|(table, _)| table)
1385}
1386
1387fn collect_rows<T>(rows: impl Iterator<Item = rusqlite::Result<T>>) -> Result<Vec<T>> {
1388    let mut values = Vec::new();
1389    for row in rows {
1390        values.push(row?);
1391    }
1392    Ok(values)
1393}
1394
1395fn join_non_empty(values: impl IntoIterator<Item = Option<String>>) -> String {
1396    let parts: Vec<String> = values
1397        .into_iter()
1398        .flatten()
1399        .map(|value| value.trim().to_string())
1400        .filter(|value| !value.is_empty())
1401        .collect();
1402    parts.join("\n\n")
1403}
1404
1405/// Authored finding-graph node kinds (`#trt1`): human/agent-authored knowledge
1406/// anchored to code, distinct from passively-captured `MemoryEvent`s.
1407#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1408pub enum AuthoredNodeKind {
1409    Finding,
1410    Decision,
1411    Note,
1412}
1413
1414impl AuthoredNodeKind {
1415    pub fn as_str(self) -> &'static str {
1416        match self {
1417            Self::Finding => "finding",
1418            Self::Decision => "decision",
1419            Self::Note => "note",
1420        }
1421    }
1422
1423    pub fn parse(raw: &str) -> Result<Self> {
1424        match raw {
1425            "finding" => Ok(Self::Finding),
1426            "decision" => Ok(Self::Decision),
1427            "note" => Ok(Self::Note),
1428            other => {
1429                bail!("unsupported authored node kind `{other}` (expected finding|decision|note)")
1430            }
1431        }
1432    }
1433}
1434
1435/// Build a `GraphProjection` for an authored finding/decision/note node
1436/// (`#trt1`), anchored to a stable symbol handle (graph node id / tagpath — NOT
1437/// a line number) via an `annotates` edge. `confidence` is clamped to `0..=1`;
1438/// `observed_at_unix` is the freshness stamp. The node id is content-stable, so
1439/// re-authoring the same text on the same anchor dedupes instead of duplicating.
1440pub fn authored_node_projection(
1441    kind: AuthoredNodeKind,
1442    text: &str,
1443    anchor_handle: &str,
1444    confidence: f64,
1445    observed_at_unix: i64,
1446    session_id: Option<&str>,
1447) -> GraphProjection {
1448    let mut projection = GraphProjection::default();
1449    let confidence = confidence.clamp(0.0, 1.0);
1450    let node_id = format!(
1451        "{}:{}",
1452        kind.as_str(),
1453        blake3::hash(format!("{}|{}|{}", kind.as_str(), anchor_handle, text).as_bytes()).to_hex()
1454    );
1455    let label: String = text.chars().take(80).collect();
1456    let mut node = GraphNode::new(&node_id, kind.as_str(), label)
1457        .with_property("text", text)
1458        .with_property("anchor_handle", anchor_handle)
1459        .with_property("confidence", format!("{confidence:.3}"))
1460        .with_property("observed_at_unix", observed_at_unix.to_string())
1461        .with_provenance(GraphProvenance::new("tsift-findings", anchor_handle))
1462        .with_freshness(GraphFreshness {
1463            content_hash: None,
1464            observed_at_unix: Some(observed_at_unix),
1465        });
1466    if let Some(session_id) = session_id {
1467        node = node.with_property("session_id", session_id);
1468    }
1469    projection.nodes.push(node);
1470    projection.edges.push(
1471        GraphEdge::new(node_id, anchor_handle, "annotates")
1472            .with_property("authored_kind", kind.as_str())
1473            .with_provenance(GraphProvenance::new("tsift-findings", anchor_handle)),
1474    );
1475    projection
1476}
1477
1478#[cfg(test)]
1479mod tests {
1480    use super::*;
1481    use tempfile::TempDir;
1482
1483    #[test]
1484    fn handoff_plan_defers_oversized_events_before_model_call() {
1485        let small = MemoryEvent::new(MemoryEventKind::PromptTarget, "session.md", "short");
1486        let large = MemoryEvent::new(
1487            MemoryEventKind::ToolResultArtifact,
1488            "artifact:log",
1489            "x".repeat(10_000),
1490        );
1491        let plan = plan_capture_handoff(
1492            &[small, large],
1493            MemoryBudget {
1494                max_prompt_tokens: 1000,
1495                reserve_tokens: 100,
1496                max_event_tokens: 500,
1497            },
1498        );
1499        assert_eq!(plan.status, "split_required");
1500        assert_eq!(plan.included_events.len(), 1);
1501        assert_eq!(
1502            plan.deferred_events[0].reason,
1503            "event_exceeds_max_event_tokens"
1504        );
1505    }
1506
1507    #[test]
1508    fn authored_node_projection_anchors_and_dedupes() {
1509        let p = authored_node_projection(
1510            AuthoredNodeKind::Finding,
1511            "decay ranking ships in tsift-memory",
1512            "symbol:rank_memory_events",
1513            1.7, // out of range -> clamps to 1.0
1514            1_700_000_000,
1515            Some("sess-1"),
1516        );
1517        assert_eq!(p.nodes.len(), 1);
1518        assert_eq!(p.edges.len(), 1);
1519        let node = &p.nodes[0];
1520        assert_eq!(node.kind, "finding");
1521        assert_eq!(node.properties.get("confidence").unwrap(), "1.000");
1522        assert_eq!(
1523            node.properties.get("anchor_handle").unwrap(),
1524            "symbol:rank_memory_events"
1525        );
1526        assert_eq!(p.edges[0].kind, "annotates");
1527        assert_eq!(p.edges[0].to_id, "symbol:rank_memory_events");
1528        assert_eq!(node.freshness.as_ref().unwrap().observed_at_unix, Some(1_700_000_000));
1529
1530        // Same kind+anchor+text -> identical (deduping) node id.
1531        let again = authored_node_projection(
1532            AuthoredNodeKind::Finding,
1533            "decay ranking ships in tsift-memory",
1534            "symbol:rank_memory_events",
1535            0.5,
1536            1_700_000_999,
1537            None,
1538        );
1539        assert_eq!(again.nodes[0].id, node.id);
1540        assert!(AuthoredNodeKind::parse("note").is_ok());
1541        assert!(AuthoredNodeKind::parse("bogus").is_err());
1542    }
1543
1544    #[test]
1545    fn memory_store_initializes_schema_and_dedupes_imported_events() {
1546        let dir = TempDir::new().unwrap();
1547        let db = dir.path().join("memory.db");
1548        let store = MemoryStore::open_or_create(&db).unwrap();
1549        let event = MemoryEvent::new(
1550            MemoryEventKind::ImportedObservation,
1551            "claude-mem:observations:1",
1552            "fact",
1553        )
1554        .with_session_id("session-a")
1555        .with_import("claude-mem", "observations:1");
1556
1557        let first = store.insert_event(&event).unwrap();
1558        let second = store.insert_event(&event).unwrap();
1559        assert_eq!(first, second);
1560        assert_eq!(store.event_count().unwrap(), 1);
1561    }
1562
1563    #[test]
1564    fn claude_mem_pending_messages_are_reported_but_not_imported() {
1565        let dir = TempDir::new().unwrap();
1566        let db = dir.path().join("claude-mem.db");
1567        let conn = Connection::open(&db).unwrap();
1568        conn.execute_batch(
1569            r#"
1570            CREATE TABLE pending_messages (
1571              id INTEGER PRIMARY KEY,
1572              session_db_id INTEGER NOT NULL,
1573              content_session_id TEXT NOT NULL,
1574              message_type TEXT NOT NULL CHECK(message_type IN ('observation', 'summarize')),
1575              tool_name TEXT,
1576              tool_input TEXT,
1577              tool_response TEXT,
1578              cwd TEXT,
1579              last_user_message TEXT,
1580              last_assistant_message TEXT,
1581              prompt_number INTEGER,
1582              status TEXT NOT NULL DEFAULT 'pending',
1583              retry_count INTEGER NOT NULL DEFAULT 0,
1584              created_at_epoch INTEGER NOT NULL,
1585              started_processing_at_epoch INTEGER,
1586              completed_at_epoch INTEGER,
1587              failed_at_epoch INTEGER
1588            );
1589            INSERT INTO pending_messages (
1590              id, session_db_id, content_session_id, message_type, tool_name,
1591              tool_input, tool_response, cwd, last_user_message,
1592              last_assistant_message, prompt_number, status, retry_count,
1593              created_at_epoch
1594            ) VALUES (
1595              1, 10, 'session-a', 'observation', 'bash',
1596              '{"cmd":"cat large.log"}', 'raw tool response', '/repo',
1597              'run tests', 'done', 7, 'pending', 0, 1700000000
1598            );
1599            "#,
1600        )
1601        .unwrap();
1602
1603        let plan = inspect_claude_mem(&db).unwrap();
1604        assert_eq!(plan.pending_messages.rows, 1);
1605        assert!(!plan.pending_messages.supported);
1606        assert_eq!(
1607            plan.pending_messages.unsupported_reason.as_deref(),
1608            Some(PENDING_MESSAGES_EXCLUSION_REASON)
1609        );
1610        assert!(
1611            plan.warnings
1612                .iter()
1613                .any(|warning| warning.contains("pending_messages"))
1614        );
1615
1616        let read_report = read_claude_mem_events(&db, Some(100)).unwrap();
1617        assert!(read_report.events.is_empty());
1618    }
1619
1620    #[test]
1621    fn claude_mem_import_all_reconciles_supported_table_counts() {
1622        let dir = TempDir::new().unwrap();
1623        let source = dir.path().join("claude-mem.db");
1624        let target = dir.path().join("memory.db");
1625        let conn = Connection::open(&source).unwrap();
1626        create_supported_claude_mem_fixture(&conn);
1627
1628        let dry_run = import_claude_mem(&source, &target, None, true).unwrap();
1629        assert!(dry_run.import_all);
1630        assert!(dry_run.reconciliation.complete);
1631        assert_eq!(dry_run.reconciliation.total_source_rows, 6);
1632        assert_eq!(dry_run.reconciliation.total_read_events, 6);
1633        assert_eq!(dry_run.reconciliation.total_skipped_source_rows, 0);
1634        assert_eq!(dry_run.reconciliation.observations.source_rows, 2);
1635        assert_eq!(dry_run.reconciliation.session_summaries.source_rows, 1);
1636        assert_eq!(dry_run.reconciliation.user_prompts.source_rows, 3);
1637
1638        let applied = import_claude_mem(&source, &target, None, false).unwrap();
1639        assert_eq!(applied.planned_events, 6);
1640        assert_eq!(applied.imported_events, 6);
1641        assert_eq!(applied.already_present_events, 0);
1642        assert_eq!(applied.event_ids_total, 6);
1643        assert_eq!(applied.event_ids.len(), 6);
1644        assert!(!applied.event_ids_truncated);
1645        assert_eq!(
1646            MemoryStore::open_or_create(&target)
1647                .unwrap()
1648                .event_count()
1649                .unwrap(),
1650            6
1651        );
1652
1653        let second_apply = import_claude_mem(&source, &target, None, false).unwrap();
1654        assert_eq!(second_apply.imported_events, 0);
1655        assert_eq!(second_apply.already_present_events, 6);
1656        assert_eq!(second_apply.reconciliation.total_already_present_events, 6);
1657    }
1658
1659    #[test]
1660    fn claude_mem_limited_import_reports_skipped_source_rows() {
1661        let dir = TempDir::new().unwrap();
1662        let source = dir.path().join("claude-mem.db");
1663        let conn = Connection::open(&source).unwrap();
1664        create_supported_claude_mem_fixture(&conn);
1665
1666        let read_report = read_claude_mem_events(&source, Some(1)).unwrap();
1667        assert!(!read_report.import_all);
1668        assert!(!read_report.reconciliation.complete);
1669        assert_eq!(read_report.reconciliation.total_source_rows, 6);
1670        assert_eq!(read_report.reconciliation.total_read_events, 3);
1671        assert_eq!(read_report.reconciliation.total_skipped_source_rows, 3);
1672        assert_eq!(
1673            read_report.reconciliation.observations.skipped_source_rows,
1674            1
1675        );
1676        assert_eq!(
1677            read_report
1678                .reconciliation
1679                .session_summaries
1680                .skipped_source_rows,
1681            0
1682        );
1683        assert_eq!(
1684            read_report.reconciliation.user_prompts.skipped_source_rows,
1685            2
1686        );
1687    }
1688
1689    #[test]
1690    fn claude_mem_import_caps_reported_event_ids() {
1691        let dir = TempDir::new().unwrap();
1692        let source = dir.path().join("claude-mem.db");
1693        let target = dir.path().join("memory.db");
1694        let conn = Connection::open(&source).unwrap();
1695        conn.execute_batch(
1696            r#"
1697            CREATE TABLE user_prompts (
1698              id INTEGER PRIMARY KEY,
1699              content_session_id TEXT NOT NULL,
1700              prompt_number INTEGER NOT NULL,
1701              prompt_text TEXT NOT NULL,
1702              created_at_epoch INTEGER NOT NULL
1703            );
1704            "#,
1705        )
1706        .unwrap();
1707        for id in 1..=(MAX_IMPORT_EVENT_IDS + 1) {
1708            conn.execute(
1709                "INSERT INTO user_prompts (id, content_session_id, prompt_number, prompt_text, created_at_epoch) VALUES (?1, 'session-a', ?2, ?3, ?4)",
1710                params![id as i64, id as i64, format!("prompt {id}"), 1700000000_i64 + id as i64],
1711            )
1712            .unwrap();
1713        }
1714
1715        let applied = import_claude_mem(&source, &target, None, false).unwrap();
1716        assert_eq!(applied.planned_events, MAX_IMPORT_EVENT_IDS + 1);
1717        assert_eq!(applied.event_ids_total, MAX_IMPORT_EVENT_IDS + 1);
1718        assert_eq!(applied.event_ids.len(), MAX_IMPORT_EVENT_IDS);
1719        assert!(applied.event_ids_truncated);
1720        assert!(applied.reconciliation.complete);
1721    }
1722
1723    fn create_supported_claude_mem_fixture(conn: &Connection) {
1724        conn.execute_batch(
1725            r#"
1726            CREATE TABLE observations (
1727              id INTEGER PRIMARY KEY,
1728              memory_session_id TEXT NOT NULL,
1729              project TEXT NOT NULL,
1730              type TEXT NOT NULL,
1731              title TEXT,
1732              subtitle TEXT,
1733              text TEXT,
1734              facts TEXT,
1735              narrative TEXT,
1736              concepts TEXT,
1737              prompt_number INTEGER,
1738              discovery_tokens INTEGER NOT NULL,
1739              created_at_epoch INTEGER NOT NULL,
1740              content_hash TEXT
1741            );
1742            CREATE TABLE session_summaries (
1743              id INTEGER PRIMARY KEY,
1744              memory_session_id TEXT NOT NULL,
1745              project TEXT NOT NULL,
1746              request TEXT,
1747              investigated TEXT,
1748              learned TEXT,
1749              completed TEXT,
1750              next_steps TEXT,
1751              notes TEXT,
1752              prompt_number INTEGER,
1753              discovery_tokens INTEGER NOT NULL,
1754              created_at_epoch INTEGER NOT NULL
1755            );
1756            CREATE TABLE user_prompts (
1757              id INTEGER PRIMARY KEY,
1758              content_session_id TEXT NOT NULL,
1759              prompt_number INTEGER NOT NULL,
1760              prompt_text TEXT NOT NULL,
1761              created_at_epoch INTEGER NOT NULL
1762            );
1763            INSERT INTO observations (
1764              id, memory_session_id, project, type, title, subtitle, text, facts,
1765              narrative, concepts, prompt_number, discovery_tokens, created_at_epoch,
1766              content_hash
1767            ) VALUES
1768              (1, 'session-a', 'agent-loop', 'fact', 'Title A', NULL, 'Text A', NULL, NULL, 'tsift', 1, 42, 1700000001, 'hash-a'),
1769              (2, 'session-b', 'agent-loop', 'fact', 'Title B', NULL, 'Text B', NULL, NULL, 'memory', 2, 43, 1700000002, 'hash-b');
1770            INSERT INTO session_summaries (
1771              id, memory_session_id, project, request, investigated, learned,
1772              completed, next_steps, notes, prompt_number, discovery_tokens,
1773              created_at_epoch
1774            ) VALUES (
1775              1, 'session-a', 'agent-loop', 'replace claude-mem', 'tables',
1776              'all rows', 'imported', 'refresh graph', NULL, 3, 44, 1700000003
1777            );
1778            INSERT INTO user_prompts (
1779              id, content_session_id, prompt_number, prompt_text, created_at_epoch
1780            ) VALUES
1781              (1, 'session-a', 1, 'first prompt', 1700000004),
1782              (2, 'session-a', 2, 'second prompt', 1700000005),
1783              (3, 'session-b', 1, 'third prompt', 1700000006);
1784            "#,
1785        )
1786        .unwrap();
1787    }
1788
1789    #[test]
1790    fn read_memory_events_round_trips_closeout_events() {
1791        let dir = TempDir::new().unwrap();
1792        let db = dir.path().join("memory.db");
1793        let store = MemoryStore::open_or_create(&db).unwrap();
1794        for event in agent_doc_closeout_events(
1795            Path::new("tasks/software/tsift.md"),
1796            "do [#tsiftmemhooks]",
1797            "wired closeout capture",
1798            Some("abc123"),
1799            "ok",
1800        ) {
1801            store.insert_event(&event).unwrap();
1802        }
1803
1804        let events = read_memory_events(&db, 10).unwrap();
1805        assert!(events.iter().any(|event| {
1806            event.kind == MemoryEventKind::PromptTarget
1807                && event.text == "do [#tsiftmemhooks]"
1808                && event.session_id.as_deref() == Some("tasks/software/tsift.md")
1809        }));
1810        assert!(events.iter().any(|event| {
1811            event.kind == MemoryEventKind::CloseoutProof
1812                && event.metadata.get("commit_hash") == Some(&"abc123".to_string())
1813        }));
1814    }
1815
1816    #[test]
1817    fn budget_guard_fails_closed_with_retryable_chunks() {
1818        let report = guard_memory_handoff(
1819            MemoryBudgetGuardInput::new("tool.log", "tool_result", "x".repeat(5_000)),
1820            MemoryBudget {
1821                max_prompt_tokens: 1000,
1822                reserve_tokens: 100,
1823                max_event_tokens: 400,
1824            },
1825        );
1826
1827        assert!(!report.allowed);
1828        assert_eq!(report.status, "blocked_split_required");
1829        assert!(report.replacement.is_some());
1830        assert!(report.retryable_chunk_plan.len() > 1);
1831        assert!(
1832            report
1833                .retryable_chunk_plan
1834                .iter()
1835                .all(|chunk| chunk.token_estimate <= 400)
1836        );
1837    }
1838
1839    #[test]
1840    fn budget_guard_replaces_transcripts_with_session_review_commands() {
1841        let report = guard_memory_handoff(
1842            MemoryBudgetGuardInput::new("session.jsonl", "transcript", "x".repeat(5_000)),
1843            MemoryBudget {
1844                max_prompt_tokens: 1000,
1845                reserve_tokens: 100,
1846                max_event_tokens: 400,
1847            },
1848        );
1849        let replacement = report.replacement.unwrap();
1850        assert_eq!(
1851            replacement.strategy,
1852            "replace_raw_transcript_with_session_review_or_context_pack_handle"
1853        );
1854        assert!(
1855            replacement
1856                .session_review_command
1857                .contains("session-review")
1858        );
1859        assert!(replacement.context_command.contains("context-pack"));
1860    }
1861}