Skip to main content

tsift_memory/
lib.rs

1use anyhow::{Context, Result, bail};
2use rusqlite::{Connection, OpenFlags, OptionalExtension, Row, params, params_from_iter};
3use serde::{Deserialize, Serialize};
4use std::collections::{BTreeMap, BTreeSet};
5use std::path::{Path, PathBuf};
6use tsift_core::{GraphEdge, GraphFreshness, GraphNode, GraphProjection, GraphProvenance};
7
8pub const MEMORY_CONTRACT_VERSION: &str = "tsift-memory-v1";
9pub const MEMORY_SCHEMA_VERSION: i64 = 2;
10pub const DEFAULT_MAX_PROMPT_TOKENS: usize = 4096;
11pub const DEFAULT_RESERVE_TOKENS: usize = 512;
12pub const DEFAULT_MAX_EVENT_TOKENS: usize = 1536;
13pub const MEMORY_BUDGET_GUARD_CONTRACT_VERSION: &str = "tsift-memory-budget-guard-v1";
14
15const MAX_IMPORT_EVENT_IDS: usize = 100;
16pub const DEFAULT_MEMORY_CANDIDATE_LIMIT: usize = 256;
17const MEMORY_FTS_BACKFILL_KEY: &str = "memory_events_fts_rebuilt_schema_version";
18const MAX_MEMORY_FTS_TERMS: usize = 16;
19
20const MEMORY_SCHEMA_SQL: &str = r#"
21PRAGMA foreign_keys = ON;
22
23CREATE TABLE IF NOT EXISTS memory_schema_versions (
24  version INTEGER PRIMARY KEY,
25  applied_at_unix INTEGER NOT NULL
26);
27
28INSERT OR IGNORE INTO memory_schema_versions(version, applied_at_unix)
29VALUES (1, strftime('%s','now'));
30
31INSERT OR IGNORE INTO memory_schema_versions(version, applied_at_unix)
32VALUES (2, strftime('%s','now'));
33
34CREATE TABLE IF NOT EXISTS memory_internal_state (
35key TEXT PRIMARY KEY,
36value TEXT NOT NULL
37);
38
39CREATE TABLE IF NOT EXISTS memory_events (
40  id TEXT PRIMARY KEY,
41  kind TEXT NOT NULL,
42  session_id TEXT,
43  source_ref TEXT NOT NULL,
44  text TEXT NOT NULL,
45  metadata_json TEXT NOT NULL DEFAULT '{}',
46  observed_at_unix INTEGER,
47  token_estimate INTEGER NOT NULL,
48  imported_from TEXT,
49  imported_id TEXT,
50  created_at_unix INTEGER NOT NULL DEFAULT (strftime('%s','now'))
51);
52
53CREATE INDEX IF NOT EXISTS idx_memory_events_kind
54ON memory_events(kind);
55
56CREATE INDEX IF NOT EXISTS idx_memory_events_session
57ON memory_events(session_id);
58
59CREATE INDEX IF NOT EXISTS idx_memory_events_observed_created
60ON memory_events(COALESCE(observed_at_unix, created_at_unix), created_at_unix, id);
61
62CREATE INDEX IF NOT EXISTS idx_memory_events_created_at
63ON memory_events(created_at_unix, id);
64
65CREATE UNIQUE INDEX IF NOT EXISTS idx_memory_events_import_source
66ON memory_events(imported_from, imported_id)
67WHERE imported_from IS NOT NULL AND imported_id IS NOT NULL;
68
69CREATE VIRTUAL TABLE IF NOT EXISTS memory_events_fts
70USING fts5(
71text,
72source_ref,
73kind,
74metadata_json,
75content='memory_events',
76content_rowid='rowid',
77tokenize = 'unicode61'
78);
79
80CREATE TRIGGER IF NOT EXISTS memory_events_ai AFTER INSERT ON memory_events BEGIN
81INSERT INTO memory_events_fts(rowid, text, source_ref, kind, metadata_json)
82VALUES (new.rowid, new.text, new.source_ref, new.kind, new.metadata_json);
83END;
84
85CREATE TRIGGER IF NOT EXISTS memory_events_ad AFTER DELETE ON memory_events BEGIN
86INSERT INTO memory_events_fts(memory_events_fts, rowid, text, source_ref, kind, metadata_json)
87VALUES('delete', old.rowid, old.text, old.source_ref, old.kind, old.metadata_json);
88END;
89
90CREATE TRIGGER IF NOT EXISTS memory_events_au AFTER UPDATE ON memory_events BEGIN
91INSERT INTO memory_events_fts(memory_events_fts, rowid, text, source_ref, kind, metadata_json)
92VALUES('delete', old.rowid, old.text, old.source_ref, old.kind, old.metadata_json);
93INSERT INTO memory_events_fts(rowid, text, source_ref, kind, metadata_json)
94VALUES (new.rowid, new.text, new.source_ref, new.kind, new.metadata_json);
95END;
96
97CREATE TABLE IF NOT EXISTS memory_session_summaries (
98  id TEXT PRIMARY KEY,
99  session_id TEXT NOT NULL,
100  summary TEXT NOT NULL,
101  metadata_json TEXT NOT NULL DEFAULT '{}',
102  observed_at_unix INTEGER,
103  token_estimate INTEGER NOT NULL,
104  created_at_unix INTEGER NOT NULL DEFAULT (strftime('%s','now'))
105);
106
107CREATE TABLE IF NOT EXISTS memory_artifacts (
108  id TEXT PRIMARY KEY,
109  session_id TEXT,
110  source_ref TEXT NOT NULL,
111  artifact_kind TEXT NOT NULL,
112  path TEXT,
113  handle TEXT,
114  metadata_json TEXT NOT NULL DEFAULT '{}',
115  token_estimate INTEGER NOT NULL DEFAULT 0,
116  created_at_unix INTEGER NOT NULL DEFAULT (strftime('%s','now'))
117);
118
119CREATE TABLE IF NOT EXISTS memory_tool_spans (
120  id TEXT PRIMARY KEY,
121  session_id TEXT,
122  tool_name TEXT NOT NULL,
123  input_artifact_id TEXT,
124  output_artifact_id TEXT,
125  status TEXT NOT NULL,
126  metadata_json TEXT NOT NULL DEFAULT '{}',
127  started_at_unix INTEGER,
128  completed_at_unix INTEGER,
129  FOREIGN KEY(input_artifact_id) REFERENCES memory_artifacts(id),
130  FOREIGN KEY(output_artifact_id) REFERENCES memory_artifacts(id)
131);
132
133CREATE TABLE IF NOT EXISTS memory_embeddings (
134  id TEXT PRIMARY KEY,
135  owner_kind TEXT NOT NULL,
136  owner_id TEXT NOT NULL,
137  provider TEXT NOT NULL,
138  model TEXT NOT NULL,
139  vector_ref TEXT NOT NULL,
140  dimensions INTEGER,
141  metadata_json TEXT NOT NULL DEFAULT '{}',
142  created_at_unix INTEGER NOT NULL DEFAULT (strftime('%s','now'))
143);
144
145CREATE TABLE IF NOT EXISTS memory_graph_links (
146  id TEXT PRIMARY KEY,
147  memory_event_id TEXT NOT NULL,
148  graph_node_id TEXT NOT NULL,
149  graph_edge_id TEXT,
150  link_kind TEXT NOT NULL,
151  metadata_json TEXT NOT NULL DEFAULT '{}',
152  created_at_unix INTEGER NOT NULL DEFAULT (strftime('%s','now')),
153  FOREIGN KEY(memory_event_id) REFERENCES memory_events(id)
154);
155
156CREATE TABLE IF NOT EXISTS memory_import_runs (
157  id TEXT PRIMARY KEY,
158  source TEXT NOT NULL,
159  source_ref TEXT NOT NULL,
160  status TEXT NOT NULL,
161  imported_events INTEGER NOT NULL DEFAULT 0,
162  warnings_json TEXT NOT NULL DEFAULT '[]',
163  started_at_unix INTEGER NOT NULL DEFAULT (strftime('%s','now')),
164  completed_at_unix INTEGER
165);
166"#;
167
168pub fn memory_schema_sql() -> &'static str {
169    MEMORY_SCHEMA_SQL
170}
171
172pub fn default_memory_db_path(project_root: &Path) -> PathBuf {
173    project_root.join(".tsift").join("memory.db")
174}
175
176pub fn default_claude_mem_db_path() -> Option<PathBuf> {
177    std::env::var_os("HOME")
178        .map(PathBuf::from)
179        .map(|home| home.join(".claude-mem").join("claude-mem.db"))
180}
181
182pub fn estimate_tokens(text: &str) -> usize {
183    let bytes = text.trim().len();
184    if bytes == 0 { 0 } else { bytes.div_ceil(4) }
185}
186
187#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
188#[serde(rename_all = "snake_case")]
189pub enum MemoryReadOrder {
190    OldestFirst,
191    RecentFirst,
192    QueryRelevant,
193}
194
195impl MemoryReadOrder {
196    pub fn as_str(self) -> &'static str {
197        match self {
198            Self::OldestFirst => "oldest_first",
199            Self::RecentFirst => "recent_first",
200            Self::QueryRelevant => "query_relevant",
201        }
202    }
203}
204
205#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
206pub struct MemoryReadPolicy {
207    pub order: MemoryReadOrder,
208    #[serde(skip_serializing_if = "Option::is_none")]
209    pub query: Option<String>,
210}
211
212impl MemoryReadPolicy {
213    pub fn oldest_first() -> Self {
214        Self {
215            order: MemoryReadOrder::OldestFirst,
216            query: None,
217        }
218    }
219
220    pub fn recent_first() -> Self {
221        Self {
222            order: MemoryReadOrder::RecentFirst,
223            query: None,
224        }
225    }
226
227    pub fn query_relevant(query: impl Into<String>) -> Self {
228        Self {
229            order: MemoryReadOrder::QueryRelevant,
230            query: Some(query.into()),
231        }
232    }
233
234    pub fn validate(&self) -> Result<()> {
235        if self.order == MemoryReadOrder::QueryRelevant
236            && self
237                .query
238                .as_deref()
239                .is_none_or(|query| query.trim().is_empty())
240        {
241            bail!("query_relevant memory read policy requires a non-empty query");
242        }
243        Ok(())
244    }
245}
246
247impl Default for MemoryReadPolicy {
248    fn default() -> Self {
249        Self::recent_first()
250    }
251}
252
253#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
254pub struct MemoryReadWatermark {
255    pub policy: MemoryReadPolicy,
256    pub limit: usize,
257    pub events_read: usize,
258    pub events_available: usize,
259    pub max_rowid: Option<i64>,
260    pub min_observed_at_unix: Option<i64>,
261    pub max_observed_at_unix: Option<i64>,
262    pub max_created_at_unix: Option<i64>,
263    pub first_event_id: Option<String>,
264    pub last_event_id: Option<String>,
265    pub source_watermark: String,
266    pub content_hash: String,
267}
268
269#[derive(Debug, Clone, PartialEq, Eq)]
270struct MemorySourceStats {
271    events_available: usize,
272    max_rowid: Option<i64>,
273    max_observed_at_unix: Option<i64>,
274    max_created_at_unix: Option<i64>,
275}
276
277fn memory_content_hash<T: Serialize>(value: &T) -> Result<String> {
278    let bytes = serde_json::to_vec(value)?;
279    Ok(blake3::hash(&bytes).to_hex().to_string())
280}
281
282#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
283#[serde(rename_all = "snake_case")]
284pub enum MemoryEventKind {
285    PromptTarget,
286    ToolCall,
287    ToolResultArtifact,
288    ResponseSummary,
289    CloseoutProof,
290    SessionCheck,
291    ImportedObservation,
292    ImportedSessionSummary,
293    ImportedUserPrompt,
294}
295
296impl MemoryEventKind {
297    pub fn as_str(self) -> &'static str {
298        match self {
299            Self::PromptTarget => "prompt_target",
300            Self::ToolCall => "tool_call",
301            Self::ToolResultArtifact => "tool_result_artifact",
302            Self::ResponseSummary => "response_summary",
303            Self::CloseoutProof => "closeout_proof",
304            Self::SessionCheck => "session_check",
305            Self::ImportedObservation => "imported_observation",
306            Self::ImportedSessionSummary => "imported_session_summary",
307            Self::ImportedUserPrompt => "imported_user_prompt",
308        }
309    }
310
311    pub fn parse(raw: &str) -> Result<Self> {
312        match raw {
313            "prompt_target" => Ok(Self::PromptTarget),
314            "tool_call" => Ok(Self::ToolCall),
315            "tool_result_artifact" => Ok(Self::ToolResultArtifact),
316            "response_summary" => Ok(Self::ResponseSummary),
317            "closeout_proof" => Ok(Self::CloseoutProof),
318            "session_check" => Ok(Self::SessionCheck),
319            "imported_observation" => Ok(Self::ImportedObservation),
320            "imported_session_summary" => Ok(Self::ImportedSessionSummary),
321            "imported_user_prompt" => Ok(Self::ImportedUserPrompt),
322            other => bail!("unsupported memory event kind `{other}`"),
323        }
324    }
325}
326
327#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
328pub struct MemoryEvent {
329    pub kind: MemoryEventKind,
330    #[serde(skip_serializing_if = "Option::is_none")]
331    pub session_id: Option<String>,
332    pub source_ref: String,
333    pub text: String,
334    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
335    pub metadata: BTreeMap<String, String>,
336    #[serde(skip_serializing_if = "Option::is_none")]
337    pub observed_at_unix: Option<i64>,
338    pub token_estimate: usize,
339    #[serde(skip_serializing_if = "Option::is_none")]
340    pub imported_from: Option<String>,
341    #[serde(skip_serializing_if = "Option::is_none")]
342    pub imported_id: Option<String>,
343}
344
345impl MemoryEvent {
346    pub fn new(
347        kind: MemoryEventKind,
348        source_ref: impl Into<String>,
349        text: impl Into<String>,
350    ) -> Self {
351        let text = text.into();
352        Self {
353            kind,
354            session_id: None,
355            source_ref: source_ref.into(),
356            token_estimate: estimate_tokens(&text),
357            text,
358            metadata: BTreeMap::new(),
359            observed_at_unix: None,
360            imported_from: None,
361            imported_id: None,
362        }
363    }
364
365    pub fn with_session_id(mut self, session_id: impl Into<String>) -> Self {
366        self.session_id = Some(session_id.into());
367        self
368    }
369
370    pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
371        self.metadata.insert(key.into(), value.into());
372        self
373    }
374
375    pub fn with_observed_at_unix(mut self, observed_at_unix: i64) -> Self {
376        self.observed_at_unix = Some(observed_at_unix);
377        self
378    }
379
380    pub fn with_import(mut self, source: impl Into<String>, id: impl Into<String>) -> Self {
381        self.imported_from = Some(source.into());
382        self.imported_id = Some(id.into());
383        self
384    }
385
386    pub fn stable_id(&self) -> String {
387        let raw = serde_json::json!([
388            self.kind.as_str(),
389            self.session_id,
390            self.source_ref,
391            self.text,
392            self.observed_at_unix,
393            self.imported_from,
394            self.imported_id
395        ])
396        .to_string();
397        format!("memevt:{}", blake3::hash(raw.as_bytes()).to_hex())
398    }
399}
400
401#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
402pub struct MemoryBudget {
403    pub max_prompt_tokens: usize,
404    pub reserve_tokens: usize,
405    pub max_event_tokens: usize,
406}
407
408impl Default for MemoryBudget {
409    fn default() -> Self {
410        Self {
411            max_prompt_tokens: DEFAULT_MAX_PROMPT_TOKENS,
412            reserve_tokens: DEFAULT_RESERVE_TOKENS,
413            max_event_tokens: DEFAULT_MAX_EVENT_TOKENS,
414        }
415    }
416}
417
418impl MemoryBudget {
419    pub fn new(max_prompt_tokens: usize) -> Self {
420        Self {
421            max_prompt_tokens,
422            ..Self::default()
423        }
424    }
425
426    pub fn available_tokens(self) -> usize {
427        self.max_prompt_tokens.saturating_sub(self.reserve_tokens)
428    }
429}
430
431#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
432pub struct BudgetedMemoryEvent {
433    pub id: String,
434    pub kind: String,
435    pub source_ref: String,
436    pub token_estimate: usize,
437}
438
439#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
440pub struct DeferredMemoryEvent {
441    pub id: String,
442    pub kind: String,
443    pub source_ref: String,
444    pub token_estimate: usize,
445    pub reason: String,
446    pub recommended_chunks: usize,
447}
448
449#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
450pub struct MemoryHandoffPlan {
451    pub contract_version: String,
452    pub status: String,
453    pub max_prompt_tokens: usize,
454    pub reserve_tokens: usize,
455    pub available_tokens: usize,
456    pub estimated_included_tokens: usize,
457    pub included_events: Vec<BudgetedMemoryEvent>,
458    pub deferred_events: Vec<DeferredMemoryEvent>,
459    pub next_commands: Vec<String>,
460}
461
462pub fn plan_capture_handoff(events: &[MemoryEvent], budget: MemoryBudget) -> MemoryHandoffPlan {
463    let mut used = 0usize;
464    let mut included_events = Vec::new();
465    let mut deferred_events = Vec::new();
466    let available = budget.available_tokens();
467
468    for event in events {
469        let id = event.stable_id();
470        if event.token_estimate > budget.max_event_tokens {
471            deferred_events.push(DeferredMemoryEvent {
472                id,
473                kind: event.kind.as_str().to_string(),
474                source_ref: event.source_ref.clone(),
475                token_estimate: event.token_estimate,
476                reason: "event_exceeds_max_event_tokens".to_string(),
477                recommended_chunks: event
478                    .token_estimate
479                    .div_ceil(budget.max_event_tokens.max(1)),
480            });
481            continue;
482        }
483
484        if used + event.token_estimate <= available {
485            used += event.token_estimate;
486            included_events.push(BudgetedMemoryEvent {
487                id,
488                kind: event.kind.as_str().to_string(),
489                source_ref: event.source_ref.clone(),
490                token_estimate: event.token_estimate,
491            });
492        } else {
493            deferred_events.push(DeferredMemoryEvent {
494                id,
495                kind: event.kind.as_str().to_string(),
496                source_ref: event.source_ref.clone(),
497                token_estimate: event.token_estimate,
498                reason: "handoff_budget_exhausted".to_string(),
499                recommended_chunks: 1,
500            });
501        }
502    }
503
504    MemoryHandoffPlan {
505        contract_version: MEMORY_CONTRACT_VERSION.to_string(),
506        status: if deferred_events.is_empty() {
507            "ready".to_string()
508        } else {
509            "split_required".to_string()
510        },
511        max_prompt_tokens: budget.max_prompt_tokens,
512        reserve_tokens: budget.reserve_tokens,
513        available_tokens: available,
514        estimated_included_tokens: used,
515        included_events,
516        deferred_events,
517        next_commands: vec![
518            "tsift memory handoff-plan '<event text>' --budget-tokens <n> --json".to_string(),
519            "tsift --envelope context-pack <session.md> --budget normal".to_string(),
520        ],
521    }
522}
523
524#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
525pub struct MemoryBudgetGuardInput {
526    pub source_ref: String,
527    pub payload_kind: String,
528    pub text: String,
529}
530
531impl MemoryBudgetGuardInput {
532    pub fn new(
533        source_ref: impl Into<String>,
534        payload_kind: impl Into<String>,
535        text: impl Into<String>,
536    ) -> Self {
537        Self {
538            source_ref: source_ref.into(),
539            payload_kind: payload_kind.into(),
540            text: text.into(),
541        }
542    }
543}
544
545#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
546pub struct MemoryBudgetReplacement {
547    pub strategy: String,
548    pub artifact_ref: String,
549    pub digest_command: String,
550    pub context_command: String,
551    pub session_review_command: String,
552}
553
554#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
555pub struct MemoryRetryChunk {
556    pub index: usize,
557    pub source_ref: String,
558    pub byte_start: usize,
559    pub byte_end: usize,
560    pub token_estimate: usize,
561    pub retry_command: String,
562}
563
564#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
565pub struct MemoryBudgetGuardReport {
566    pub contract_version: String,
567    pub status: String,
568    pub allowed: bool,
569    pub source_ref: String,
570    pub payload_kind: String,
571    pub byte_count: usize,
572    pub estimated_tokens: usize,
573    pub max_prompt_tokens: usize,
574    pub reserve_tokens: usize,
575    pub available_tokens: usize,
576    pub max_chunk_tokens: usize,
577    #[serde(skip_serializing_if = "Option::is_none")]
578    pub replacement: Option<MemoryBudgetReplacement>,
579    pub retryable_chunk_plan: Vec<MemoryRetryChunk>,
580    pub warnings: Vec<String>,
581    pub next_commands: Vec<String>,
582}
583
584pub fn guard_memory_handoff(
585    input: MemoryBudgetGuardInput,
586    budget: MemoryBudget,
587) -> MemoryBudgetGuardReport {
588    let estimated_tokens = estimate_tokens(&input.text);
589    let available_tokens = budget.available_tokens();
590    let allowed =
591        estimated_tokens <= available_tokens && estimated_tokens <= budget.max_event_tokens;
592    let replacement = (!allowed).then(|| replacement_for_budget_guard(&input));
593    let retryable_chunk_plan = if allowed {
594        Vec::new()
595    } else {
596        retry_chunks_for_budget_guard(&input, budget.max_event_tokens.min(available_tokens).max(1))
597    };
598    let mut warnings = Vec::new();
599    if estimated_tokens > available_tokens {
600        warnings.push("payload_exceeds_available_prompt_budget".to_string());
601    }
602    if estimated_tokens > budget.max_event_tokens {
603        warnings.push("payload_exceeds_max_chunk_tokens".to_string());
604    }
605
606    MemoryBudgetGuardReport {
607        contract_version: MEMORY_BUDGET_GUARD_CONTRACT_VERSION.to_string(),
608        status: if allowed {
609            "ready".to_string()
610        } else {
611            "blocked_split_required".to_string()
612        },
613        allowed,
614        source_ref: input.source_ref.clone(),
615        payload_kind: input.payload_kind.clone(),
616        byte_count: input.text.len(),
617        estimated_tokens,
618        max_prompt_tokens: budget.max_prompt_tokens,
619        reserve_tokens: budget.reserve_tokens,
620        available_tokens,
621        max_chunk_tokens: budget.max_event_tokens,
622        replacement,
623        retryable_chunk_plan,
624        warnings,
625        next_commands: vec![
626            format!(
627                "tsift memory budget-guard --file {} --json",
628                shell_quote(&input.source_ref)
629            ),
630            "tsift --envelope context-pack <session.md> --budget normal".to_string(),
631            "tsift --envelope session-review <session.md> --next-context --budget normal"
632                .to_string(),
633        ],
634    }
635}
636
637fn replacement_for_budget_guard(input: &MemoryBudgetGuardInput) -> MemoryBudgetReplacement {
638    let quoted_ref = shell_quote(&input.source_ref);
639    let is_transcript = matches!(
640        input.payload_kind.as_str(),
641        "transcript" | "session" | "session_transcript" | "agent_doc"
642    ) || input.source_ref.ends_with(".jsonl")
643        || input.source_ref.ends_with(".md");
644    let strategy = if is_transcript {
645        "replace_raw_transcript_with_session_review_or_context_pack_handle"
646    } else {
647        "replace_raw_tool_or_log_payload_with_digest_artifact_handle"
648    };
649    MemoryBudgetReplacement {
650        strategy: strategy.to_string(),
651        artifact_ref: format!(
652            "artifact:{}",
653            blake3::hash(input.source_ref.as_bytes()).to_hex()
654        ),
655        digest_command: if is_transcript {
656            format!("tsift --envelope session-review {quoted_ref} --next-context --budget normal")
657        } else {
658            format!("tsift log-digest --path . --input {quoted_ref} --json")
659        },
660        context_command: format!("tsift --envelope context-pack {quoted_ref} --budget normal"),
661        session_review_command: format!(
662            "tsift --envelope session-review {quoted_ref} --next-context --budget normal"
663        ),
664    }
665}
666
667fn retry_chunks_for_budget_guard(
668    input: &MemoryBudgetGuardInput,
669    max_chunk_tokens: usize,
670) -> Vec<MemoryRetryChunk> {
671    let byte_budget = max_chunk_tokens.saturating_mul(4).max(1);
672    let mut chunks = Vec::new();
673    let mut start = 0usize;
674    while start < input.text.len() {
675        let mut end = (start + byte_budget).min(input.text.len());
676        while end > start && !input.text.is_char_boundary(end) {
677            end -= 1;
678        }
679        if end == start {
680            if let Some((offset, ch)) = input.text[start..].char_indices().next() {
681                end = start + offset + ch.len_utf8();
682            } else {
683                break;
684            }
685        }
686        let token_estimate = estimate_tokens(&input.text[start..end]);
687        let index = chunks.len() + 1;
688        chunks.push(MemoryRetryChunk {
689            index,
690            source_ref: format!("{}#chunk-{index}", input.source_ref),
691            byte_start: start,
692            byte_end: end,
693            token_estimate,
694            retry_command: retry_chunk_command(input, index, start, end),
695        });
696        start = end;
697    }
698    chunks
699}
700
701fn retry_chunk_command(
702    input: &MemoryBudgetGuardInput,
703    index: usize,
704    byte_start: usize,
705    byte_end: usize,
706) -> String {
707    let chunk_ref = format!("{}#chunk-{index}", input.source_ref);
708    if input.source_ref == "inline" {
709        format!(
710            "tsift memory budget-guard --text '<chunk {index} payload>' --source-ref {} --budget-tokens <n> --json",
711            shell_quote(&chunk_ref)
712        )
713    } else {
714        format!(
715            "tsift memory budget-guard --file {} --byte-start {byte_start} --byte-end {byte_end} --source-ref {} --budget-tokens <n> --json",
716            shell_quote(&input.source_ref),
717            shell_quote(&chunk_ref)
718        )
719    }
720}
721
722fn shell_quote(s: &str) -> String {
723    if s.chars()
724        .all(|c| c.is_alphanumeric() || matches!(c, '_' | '-' | '.' | '/'))
725    {
726        s.to_string()
727    } else {
728        format!("'{}'", s.replace('\'', "'\\''"))
729    }
730}
731
732#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
733pub struct MemoryHookSpec {
734    pub name: String,
735    pub event_kind: String,
736    pub required_fields: Vec<String>,
737    pub budget_behavior: String,
738}
739
740pub fn agent_doc_hook_contract() -> Vec<MemoryHookSpec> {
741    vec![
742        MemoryHookSpec {
743            name: "prompt_target".to_string(),
744            event_kind: MemoryEventKind::PromptTarget.as_str().to_string(),
745            required_fields: vec!["session_path".to_string(), "prompt_target".to_string()],
746            budget_behavior:
747                "capture text plus graph/source handles; never inline raw transcript blobs"
748                    .to_string(),
749        },
750        MemoryHookSpec {
751            name: "tool_result_artifact".to_string(),
752            event_kind: MemoryEventKind::ToolResultArtifact.as_str().to_string(),
753            required_fields: vec!["tool_name".to_string(), "artifact_handle".to_string()],
754            budget_behavior: "store artifact handle and digest, not full raw output".to_string(),
755        },
756        MemoryHookSpec {
757            name: "response_summary".to_string(),
758            event_kind: MemoryEventKind::ResponseSummary.as_str().to_string(),
759            required_fields: vec!["response_topic".to_string(), "summary".to_string()],
760            budget_behavior: "summaries are capped before write and linked to source handles"
761                .to_string(),
762        },
763        MemoryHookSpec {
764            name: "closeout_proof".to_string(),
765            event_kind: MemoryEventKind::CloseoutProof.as_str().to_string(),
766            required_fields: vec!["commit_hash".to_string(), "changed_paths".to_string()],
767            budget_behavior: "proof records are structured metadata with compact prose".to_string(),
768        },
769        MemoryHookSpec {
770            name: "session_check".to_string(),
771            event_kind: MemoryEventKind::SessionCheck.as_str().to_string(),
772            required_fields: vec!["status".to_string(), "session_path".to_string()],
773            budget_behavior: "persist pass/fail state and recovery commands".to_string(),
774        },
775    ]
776}
777
778pub fn agent_doc_closeout_events(
779    session_path: &Path,
780    prompt_target: &str,
781    response_summary: &str,
782    commit_hash: Option<&str>,
783    session_check_status: &str,
784) -> Vec<MemoryEvent> {
785    let session_id = session_path.display().to_string();
786    let mut events = vec![
787        MemoryEvent::new(
788            MemoryEventKind::PromptTarget,
789            session_path.display().to_string(),
790            prompt_target,
791        )
792        .with_session_id(session_id.clone()),
793        MemoryEvent::new(
794            MemoryEventKind::ResponseSummary,
795            session_path.display().to_string(),
796            response_summary,
797        )
798        .with_session_id(session_id.clone()),
799        MemoryEvent::new(
800            MemoryEventKind::SessionCheck,
801            session_path.display().to_string(),
802            session_check_status,
803        )
804        .with_session_id(session_id.clone())
805        .with_metadata("status", session_check_status),
806    ];
807
808    if let Some(commit_hash) = commit_hash {
809        events.push(
810            MemoryEvent::new(
811                MemoryEventKind::CloseoutProof,
812                session_path.display().to_string(),
813                commit_hash,
814            )
815            .with_session_id(session_id)
816            .with_metadata("commit_hash", commit_hash),
817        );
818    }
819
820    events
821}
822
823#[derive(Debug, Clone, PartialEq, Eq)]
824pub struct MemoryInsertResult {
825    pub id: String,
826    pub inserted: bool,
827}
828
829pub struct MemoryStore {
830    conn: Connection,
831}
832
833impl MemoryStore {
834    pub fn open_or_create(path: &Path) -> Result<Self> {
835        if let Some(parent) = path.parent() {
836            std::fs::create_dir_all(parent)
837                .with_context(|| format!("create {}", parent.display()))?;
838        }
839        let conn = Connection::open(path).with_context(|| format!("open {}", path.display()))?;
840        conn.execute_batch(MEMORY_SCHEMA_SQL)
841            .with_context(|| format!("initialize {}", path.display()))?;
842        ensure_memory_fts_backfill(&conn)
843            .with_context(|| format!("backfill memory FTS index for {}", path.display()))?;
844        Ok(Self { conn })
845    }
846
847    pub fn insert_event(&self, event: &MemoryEvent) -> Result<String> {
848        Ok(self.insert_event_result(event)?.id)
849    }
850
851    pub fn insert_event_result(&self, event: &MemoryEvent) -> Result<MemoryInsertResult> {
852        insert_event_on(&self.conn, event)
853    }
854
855    pub fn insert_events(&mut self, events: &[MemoryEvent]) -> Result<Vec<MemoryInsertResult>> {
856        let tx = self.conn.transaction()?;
857        let mut results = Vec::with_capacity(events.len());
858        for event in events {
859            results.push(insert_event_on(&tx, event)?);
860        }
861        tx.commit()?;
862        Ok(results)
863    }
864
865    pub fn event_count(&self) -> Result<usize> {
866        let count: i64 = self
867            .conn
868            .query_row("SELECT COUNT(*) FROM memory_events", [], |row| row.get(0))?;
869        Ok(count as usize)
870    }
871}
872
873fn insert_event_on(conn: &Connection, event: &MemoryEvent) -> Result<MemoryInsertResult> {
874    let id = event.stable_id();
875    let metadata_json = serde_json::to_string(&event.metadata)?;
876    let changed = conn.execute(
877        r#"
878            INSERT OR IGNORE INTO memory_events(
879              id, kind, session_id, source_ref, text, metadata_json,
880              observed_at_unix, token_estimate, imported_from, imported_id
881            )
882            VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)
883            "#,
884        params![
885            &id,
886            event.kind.as_str(),
887            event.session_id.as_deref(),
888            &event.source_ref,
889            &event.text,
890            &metadata_json,
891            event.observed_at_unix,
892            event.token_estimate as i64,
893            event.imported_from.as_deref(),
894            event.imported_id.as_deref()
895        ],
896    )?;
897    Ok(MemoryInsertResult {
898        id,
899        inserted: changed > 0,
900    })
901}
902
903fn ensure_memory_fts_backfill(conn: &Connection) -> Result<()> {
904    let target_version = MEMORY_SCHEMA_VERSION.to_string();
905    let backfilled_version: Option<String> = conn
906        .query_row(
907            "SELECT value FROM memory_internal_state WHERE key = ?1",
908            [MEMORY_FTS_BACKFILL_KEY],
909            |row| row.get(0),
910        )
911        .optional()?;
912    if backfilled_version.as_deref() == Some(target_version.as_str()) {
913        return Ok(());
914    }
915
916    conn.execute(
917        "INSERT INTO memory_events_fts(memory_events_fts) VALUES('rebuild')",
918        [],
919    )?;
920    conn.execute(
921        "INSERT OR REPLACE INTO memory_internal_state(key, value) VALUES (?1, ?2)",
922        params![MEMORY_FTS_BACKFILL_KEY, target_version],
923    )?;
924    Ok(())
925}
926
927fn memory_event_from_row(row: &Row<'_>, offset: usize) -> Result<MemoryEvent> {
928    let kind_raw: String = row.get(offset)?;
929    let session_id: Option<String> = row.get(offset + 1)?;
930    let source_ref: String = row.get(offset + 2)?;
931    let text: String = row.get(offset + 3)?;
932    let metadata_json: String = row.get(offset + 4)?;
933    let observed_at_unix: Option<i64> = row.get(offset + 5)?;
934    let token_estimate: i64 = row.get(offset + 6)?;
935    let imported_from: Option<String> = row.get(offset + 7)?;
936    let imported_id: Option<String> = row.get(offset + 8)?;
937    let metadata = serde_json::from_str::<BTreeMap<String, String>>(&metadata_json)
938        .with_context(|| format!("parse memory metadata for {source_ref}"))?;
939    let mut event = MemoryEvent::new(MemoryEventKind::parse(&kind_raw)?, source_ref, text);
940    event.session_id = session_id;
941    event.metadata = metadata;
942    event.observed_at_unix = observed_at_unix;
943    event.token_estimate = token_estimate.max(0) as usize;
944    event.imported_from = imported_from;
945    event.imported_id = imported_id;
946    Ok(event)
947}
948
949fn memory_fts_query(query: &str) -> Option<String> {
950    let mut seen = BTreeSet::new();
951    let terms: Vec<String> = query
952        .split(|c: char| !c.is_alphanumeric())
953        .filter(|term| term.len() > 1)
954        .map(|term| term.to_lowercase())
955        .filter(|term| seen.insert(term.clone()))
956        .take(MAX_MEMORY_FTS_TERMS)
957        .map(|term| format!("\"{}\"", term.replace('"', "\"\"")))
958        .collect();
959    if terms.is_empty() {
960        None
961    } else {
962        Some(terms.join(" OR "))
963    }
964}
965
966fn memory_events_fts_available(conn: &Connection) -> Result<bool> {
967    let exists: Option<i64> = conn
968        .query_row(
969            r#"
970            SELECT 1
971            FROM sqlite_master
972            WHERE type = 'table' AND name = 'memory_events_fts'
973            "#,
974            [],
975            |row| row.get(0),
976        )
977        .optional()?;
978    Ok(exists.is_some())
979}
980
981fn push_memory_candidate(
982    events: &mut Vec<MemoryEvent>,
983    seen_rowids: &mut BTreeSet<i64>,
984    rowid: i64,
985    event: MemoryEvent,
986    limit: usize,
987) {
988    if events.len() < limit && seen_rowids.insert(rowid) {
989        events.push(event);
990    }
991}
992
993fn read_memory_events_ordered(
994    memory_db_path: &Path,
995    limit: usize,
996    recent_first: bool,
997) -> Result<Vec<MemoryEvent>> {
998    if limit == 0 || !memory_db_path.exists() {
999        return Ok(Vec::new());
1000    }
1001    let conn = Connection::open_with_flags(
1002        memory_db_path,
1003        OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_URI,
1004    )
1005    .with_context(|| format!("open memory db {}", memory_db_path.display()))?;
1006    let sql = if recent_first {
1007        r#"
1008        SELECT kind, session_id, source_ref, text, metadata_json,
1009               observed_at_unix, token_estimate, imported_from, imported_id
1010        FROM memory_events
1011        ORDER BY COALESCE(observed_at_unix, created_at_unix) DESC,
1012                 created_at_unix DESC,
1013                 id DESC
1014        LIMIT ?1
1015        "#
1016    } else {
1017        r#"
1018        SELECT kind, session_id, source_ref, text, metadata_json,
1019               observed_at_unix, token_estimate, imported_from, imported_id
1020        FROM memory_events
1021        ORDER BY COALESCE(observed_at_unix, created_at_unix), id
1022        LIMIT ?1
1023        "#
1024    };
1025    let mut stmt = conn.prepare(sql)?;
1026    let mut rows = stmt.query([limit as i64])?;
1027    let mut events = Vec::new();
1028    while let Some(row) = rows.next()? {
1029        events.push(memory_event_from_row(row, 0)?);
1030    }
1031    Ok(events)
1032}
1033
1034pub fn read_memory_events(memory_db_path: &Path, limit: usize) -> Result<Vec<MemoryEvent>> {
1035    read_memory_events_ordered(memory_db_path, limit, false)
1036}
1037
1038pub fn read_memory_events_with_policy(
1039    memory_db_path: &Path,
1040    policy: &MemoryReadPolicy,
1041    limit: usize,
1042) -> Result<Vec<MemoryEvent>> {
1043    policy.validate()?;
1044    match policy.order {
1045        MemoryReadOrder::OldestFirst => read_memory_events_ordered(memory_db_path, limit, false),
1046        MemoryReadOrder::RecentFirst => read_memory_events_ordered(memory_db_path, limit, true),
1047        MemoryReadOrder::QueryRelevant => read_memory_event_candidates(
1048            memory_db_path,
1049            policy.query.as_deref().unwrap_or_default(),
1050            limit,
1051        ),
1052    }
1053}
1054
1055fn memory_source_stats(memory_db_path: &Path) -> Result<MemorySourceStats> {
1056    if !memory_db_path.exists() {
1057        return Ok(MemorySourceStats {
1058            events_available: 0,
1059            max_rowid: None,
1060            max_observed_at_unix: None,
1061            max_created_at_unix: None,
1062        });
1063    }
1064    let conn = Connection::open_with_flags(
1065        memory_db_path,
1066        OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_URI,
1067    )
1068    .with_context(|| format!("open memory db {}", memory_db_path.display()))?;
1069    conn.query_row(
1070        r#"
1071        SELECT COUNT(*), MAX(rowid), MAX(observed_at_unix), MAX(created_at_unix)
1072        FROM memory_events
1073        "#,
1074        [],
1075        |row| {
1076            let count: i64 = row.get(0)?;
1077            Ok(MemorySourceStats {
1078                events_available: count.max(0) as usize,
1079                max_rowid: row.get(1)?,
1080                max_observed_at_unix: row.get(2)?,
1081                max_created_at_unix: row.get(3)?,
1082            })
1083        },
1084    )
1085    .map_err(Into::into)
1086}
1087
1088pub fn memory_read_watermark(
1089    memory_db_path: &Path,
1090    policy: &MemoryReadPolicy,
1091    limit: usize,
1092    events: &[MemoryEvent],
1093) -> Result<MemoryReadWatermark> {
1094    policy.validate()?;
1095    let stats = memory_source_stats(memory_db_path)?;
1096    let event_ids: Vec<String> = events.iter().map(MemoryEvent::stable_id).collect();
1097    let content_hash = memory_content_hash(&events)?;
1098    let min_observed_at_unix = events
1099        .iter()
1100        .filter_map(|event| event.observed_at_unix)
1101        .min();
1102    let max_observed_at_unix = events
1103        .iter()
1104        .filter_map(|event| event.observed_at_unix)
1105        .max();
1106    let source_watermark = memory_content_hash(&serde_json::json!({
1107        "schema_version": MEMORY_SCHEMA_VERSION,
1108        "policy": policy,
1109        "limit": limit,
1110        "events_available": stats.events_available,
1111        "max_rowid": stats.max_rowid,
1112        "max_observed_at_unix": stats.max_observed_at_unix,
1113        "max_created_at_unix": stats.max_created_at_unix,
1114        "selected_event_ids": event_ids,
1115        "selected_content_hash": content_hash,
1116    }))?;
1117    Ok(MemoryReadWatermark {
1118        policy: policy.clone(),
1119        limit,
1120        events_read: events.len(),
1121        events_available: stats.events_available,
1122        max_rowid: stats.max_rowid,
1123        min_observed_at_unix,
1124        max_observed_at_unix,
1125        max_created_at_unix: stats.max_created_at_unix,
1126        first_event_id: event_ids.first().cloned(),
1127        last_event_id: event_ids.last().cloned(),
1128        source_watermark,
1129        content_hash,
1130    })
1131}
1132
1133pub fn read_memory_event_candidates(
1134    memory_db_path: &Path,
1135    query: &str,
1136    limit: usize,
1137) -> Result<Vec<MemoryEvent>> {
1138    if limit == 0 || !memory_db_path.exists() {
1139        return Ok(Vec::new());
1140    }
1141    let conn = Connection::open_with_flags(
1142        memory_db_path,
1143        OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_URI,
1144    )
1145    .with_context(|| format!("open memory db {}", memory_db_path.display()))?;
1146
1147    let mut events = Vec::new();
1148    let mut seen_rowids = BTreeSet::new();
1149    let fts_query = memory_fts_query(query);
1150    let recent_reserve = if fts_query.is_some() && limit >= 8 {
1151        (limit / 4).max(1)
1152    } else {
1153        0
1154    };
1155    let fts_limit = limit.saturating_sub(recent_reserve);
1156    if let Some(fts_query) = fts_query
1157        && fts_limit > 0
1158        && memory_events_fts_available(&conn)?
1159    {
1160        let mut stmt = conn.prepare(
1161            r#"
1162            SELECT e.rowid, e.kind, e.session_id, e.source_ref, e.text, e.metadata_json,
1163                   e.observed_at_unix, e.token_estimate, e.imported_from, e.imported_id
1164            FROM memory_events_fts
1165            JOIN memory_events e ON e.rowid = memory_events_fts.rowid
1166            WHERE memory_events_fts MATCH ?1
1167            ORDER BY bm25(memory_events_fts),
1168                     COALESCE(e.observed_at_unix, e.created_at_unix) DESC,
1169                     e.id DESC
1170            LIMIT ?2
1171            "#,
1172        )?;
1173        let mut rows = stmt.query(params![fts_query, fts_limit as i64])?;
1174        while let Some(row) = rows.next()? {
1175            let rowid: i64 = row.get(0)?;
1176            let event = memory_event_from_row(row, 1)?;
1177            push_memory_candidate(&mut events, &mut seen_rowids, rowid, event, limit);
1178        }
1179    }
1180
1181    let remaining = limit.saturating_sub(events.len());
1182    if remaining > 0 {
1183        let recent_limit = if seen_rowids.is_empty() {
1184            remaining
1185        } else {
1186            limit
1187        };
1188        let mut stmt = conn.prepare(
1189            r#"
1190            SELECT rowid, kind, session_id, source_ref, text, metadata_json,
1191                   observed_at_unix, token_estimate, imported_from, imported_id
1192            FROM memory_events
1193            ORDER BY COALESCE(observed_at_unix, created_at_unix) DESC,
1194                     created_at_unix DESC,
1195                     id DESC
1196            LIMIT ?1
1197            "#,
1198        )?;
1199        let mut rows = stmt.query([recent_limit as i64])?;
1200        while let Some(row) = rows.next()? {
1201            let rowid: i64 = row.get(0)?;
1202            let event = memory_event_from_row(row, 1)?;
1203            push_memory_candidate(&mut events, &mut seen_rowids, rowid, event, limit);
1204        }
1205    }
1206
1207    Ok(events)
1208}
1209
1210#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
1211pub struct ClaudeMemTablePlan {
1212    pub table: String,
1213    pub supported: bool,
1214    pub rows: usize,
1215    pub columns: Vec<String>,
1216    #[serde(skip_serializing_if = "Option::is_none")]
1217    pub unsupported_reason: Option<String>,
1218}
1219
1220#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
1221pub struct ClaudeMemImportPlan {
1222    pub db_path: String,
1223    pub exists: bool,
1224    pub readable: bool,
1225    #[serde(skip_serializing_if = "Option::is_none")]
1226    pub chroma_path: Option<String>,
1227    pub chroma_present: bool,
1228    pub observations: ClaudeMemTablePlan,
1229    pub session_summaries: ClaudeMemTablePlan,
1230    pub user_prompts: ClaudeMemTablePlan,
1231    pub pending_messages: ClaudeMemTablePlan,
1232    pub warnings: Vec<String>,
1233    pub next_commands: Vec<String>,
1234}
1235
1236#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
1237pub struct ClaudeMemTableReconciliation {
1238    pub table: String,
1239    pub source_rows: usize,
1240    pub read_events: usize,
1241    pub planned_events: usize,
1242    pub imported_events: usize,
1243    pub already_present_events: usize,
1244    pub skipped_source_rows: usize,
1245    #[serde(skip_serializing_if = "Option::is_none")]
1246    pub limit_per_table: Option<usize>,
1247    pub complete: bool,
1248}
1249
1250#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
1251pub struct ClaudeMemImportReconciliation {
1252    pub observations: ClaudeMemTableReconciliation,
1253    pub session_summaries: ClaudeMemTableReconciliation,
1254    pub user_prompts: ClaudeMemTableReconciliation,
1255    pub total_source_rows: usize,
1256    pub total_read_events: usize,
1257    pub total_planned_events: usize,
1258    pub total_imported_events: usize,
1259    pub total_already_present_events: usize,
1260    pub total_skipped_source_rows: usize,
1261    pub complete: bool,
1262}
1263
1264fn empty_table_plan(table: &str) -> ClaudeMemTablePlan {
1265    ClaudeMemTablePlan {
1266        table: table.to_string(),
1267        supported: false,
1268        rows: 0,
1269        columns: Vec::new(),
1270        unsupported_reason: None,
1271    }
1272}
1273
1274const PENDING_MESSAGES_EXCLUSION_REASON: &str = "pending_messages is intentionally excluded from claude-mem import because it contains transient queue state and raw tool payload fields, not durable replacement memory";
1275
1276pub fn inspect_claude_mem(db_path: &Path) -> Result<ClaudeMemImportPlan> {
1277    let chroma_path = db_path.parent().map(|parent| parent.join("chroma"));
1278    let chroma_present = chroma_path.as_ref().is_some_and(|path| path.exists());
1279    let mut plan = ClaudeMemImportPlan {
1280        db_path: db_path.display().to_string(),
1281        exists: db_path.exists(),
1282        readable: false,
1283        chroma_path: chroma_path.as_ref().map(|path| path.display().to_string()),
1284        chroma_present,
1285        observations: empty_table_plan("observations"),
1286        session_summaries: empty_table_plan("session_summaries"),
1287        user_prompts: empty_table_plan("user_prompts"),
1288        pending_messages: empty_table_plan("pending_messages"),
1289        warnings: Vec::new(),
1290        next_commands: vec![
1291            "tsift memory import-claude-mem . --all --apply --json".to_string(),
1292            "tsift graph-db --path . --json refresh".to_string(),
1293        ],
1294    };
1295
1296    if !plan.exists {
1297        plan.warnings
1298            .push("claude-mem database not found; pass --db to inspect another path".to_string());
1299        return Ok(plan);
1300    }
1301
1302    let conn = Connection::open_with_flags(
1303        db_path,
1304        OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_URI,
1305    )
1306    .with_context(|| format!("open claude-mem db {}", db_path.display()))?;
1307    plan.readable = true;
1308    plan.observations = inspect_table(&conn, "observations", true, None)?;
1309    plan.session_summaries = inspect_table(&conn, "session_summaries", true, None)?;
1310    plan.user_prompts = inspect_table(&conn, "user_prompts", true, None)?;
1311    plan.pending_messages = inspect_table(
1312        &conn,
1313        "pending_messages",
1314        false,
1315        Some(PENDING_MESSAGES_EXCLUSION_REASON),
1316    )?;
1317    if plan.pending_messages.rows > 0 {
1318        plan.warnings
1319            .push(PENDING_MESSAGES_EXCLUSION_REASON.to_string());
1320    }
1321
1322    if !plan.chroma_present {
1323        plan.warnings
1324            .push("chroma directory was not found next to the claude-mem SQLite DB".to_string());
1325    }
1326
1327    Ok(plan)
1328}
1329
1330fn inspect_table(
1331    conn: &Connection,
1332    table: &str,
1333    supported: bool,
1334    unsupported_reason: Option<&str>,
1335) -> Result<ClaudeMemTablePlan> {
1336    if !table_exists(conn, table)? {
1337        return Ok(empty_table_plan(table));
1338    }
1339    let rows: i64 = conn.query_row(&format!("SELECT COUNT(*) FROM {table}"), [], |row| {
1340        row.get(0)
1341    })?;
1342    let columns = table_columns(conn, table)?;
1343    Ok(ClaudeMemTablePlan {
1344        table: table.to_string(),
1345        supported,
1346        rows: rows as usize,
1347        columns,
1348        unsupported_reason: unsupported_reason.map(str::to_string),
1349    })
1350}
1351
1352fn table_exists(conn: &Connection, table: &str) -> Result<bool> {
1353    let exists: Option<i64> = conn
1354        .query_row(
1355            "SELECT 1 FROM sqlite_master WHERE type IN ('table','view') AND name = ?1",
1356            [table],
1357            |row| row.get(0),
1358        )
1359        .optional()?;
1360    Ok(exists.is_some())
1361}
1362
1363fn table_columns(conn: &Connection, table: &str) -> Result<Vec<String>> {
1364    let mut stmt = conn.prepare(&format!("PRAGMA table_info({table})"))?;
1365    let mut rows = stmt.query([])?;
1366    let mut columns = Vec::new();
1367    while let Some(row) = rows.next()? {
1368        columns.push(row.get::<_, String>(1)?);
1369    }
1370    Ok(columns)
1371}
1372
1373#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
1374pub struct ClaudeMemImportReport {
1375    pub contract_version: String,
1376    pub source: String,
1377    pub target: String,
1378    pub dry_run: bool,
1379    #[serde(skip_serializing_if = "Option::is_none")]
1380    pub limit_per_table: Option<usize>,
1381    pub import_all: bool,
1382    pub imported_events: usize,
1383    pub already_present_events: usize,
1384    pub planned_events: usize,
1385    pub event_ids: Vec<String>,
1386    pub event_ids_total: usize,
1387    pub event_ids_truncated: bool,
1388    pub reconciliation: ClaudeMemImportReconciliation,
1389    pub plan: ClaudeMemImportPlan,
1390}
1391
1392#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
1393pub struct ClaudeMemReadReport {
1394    pub contract_version: String,
1395    pub source: String,
1396    #[serde(skip_serializing_if = "Option::is_none")]
1397    pub limit_per_table: Option<usize>,
1398    pub import_all: bool,
1399    pub events: Vec<MemoryEvent>,
1400    pub reconciliation: ClaudeMemImportReconciliation,
1401    pub plan: ClaudeMemImportPlan,
1402}
1403
1404pub fn read_claude_mem_events(
1405    source_db_path: &Path,
1406    limit_per_table: Option<usize>,
1407) -> Result<ClaudeMemReadReport> {
1408    let plan = inspect_claude_mem(source_db_path)?;
1409    if !plan.exists || !plan.readable {
1410        let reconciliation =
1411            reconcile_claude_mem_import(&plan, limit_per_table, &[], &BTreeMap::new());
1412        return Ok(ClaudeMemReadReport {
1413            contract_version: MEMORY_CONTRACT_VERSION.to_string(),
1414            source: source_db_path.display().to_string(),
1415            limit_per_table,
1416            import_all: limit_per_table.is_none(),
1417            events: Vec::new(),
1418            reconciliation,
1419            plan,
1420        });
1421    }
1422
1423    let conn = Connection::open_with_flags(
1424        source_db_path,
1425        OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_URI,
1426    )?;
1427    let mut events = Vec::new();
1428    if plan.observations.supported {
1429        events.extend(read_claude_mem_observations(&conn, limit_per_table)?);
1430    }
1431    if plan.session_summaries.supported {
1432        events.extend(read_claude_mem_summaries(&conn, limit_per_table)?);
1433    }
1434    if plan.user_prompts.supported {
1435        events.extend(read_claude_mem_user_prompts(&conn, limit_per_table)?);
1436    }
1437    let reconciliation =
1438        reconcile_claude_mem_import(&plan, limit_per_table, &events, &BTreeMap::new());
1439
1440    Ok(ClaudeMemReadReport {
1441        contract_version: MEMORY_CONTRACT_VERSION.to_string(),
1442        source: source_db_path.display().to_string(),
1443        limit_per_table,
1444        import_all: limit_per_table.is_none(),
1445        events,
1446        reconciliation,
1447        plan,
1448    })
1449}
1450
1451pub fn import_claude_mem(
1452    source_db_path: &Path,
1453    target_memory_db_path: &Path,
1454    limit_per_table: Option<usize>,
1455    dry_run: bool,
1456) -> Result<ClaudeMemImportReport> {
1457    let read_report = read_claude_mem_events(source_db_path, limit_per_table)?;
1458    let plan = read_report.plan;
1459    if !plan.exists || !plan.readable {
1460        let reconciliation =
1461            reconcile_claude_mem_import(&plan, limit_per_table, &[], &BTreeMap::new());
1462        return Ok(ClaudeMemImportReport {
1463            contract_version: MEMORY_CONTRACT_VERSION.to_string(),
1464            source: source_db_path.display().to_string(),
1465            target: target_memory_db_path.display().to_string(),
1466            dry_run,
1467            limit_per_table,
1468            import_all: limit_per_table.is_none(),
1469            imported_events: 0,
1470            already_present_events: 0,
1471            planned_events: 0,
1472            event_ids: Vec::new(),
1473            event_ids_total: 0,
1474            event_ids_truncated: false,
1475            reconciliation,
1476            plan,
1477        });
1478    }
1479
1480    let events = read_report.events;
1481    let planned_events = events.len();
1482    let mut event_ids = Vec::new();
1483    let mut event_ids_total = 0;
1484    let mut write_results = BTreeMap::new();
1485    if !dry_run {
1486        let mut store = MemoryStore::open_or_create(target_memory_db_path)?;
1487        let results = store.insert_events(&events)?;
1488        for (event, result) in events.iter().zip(results) {
1489            record_claude_mem_write(&mut write_results, event, result.inserted);
1490            event_ids_total += 1;
1491            if event_ids.len() < MAX_IMPORT_EVENT_IDS {
1492                event_ids.push(result.id);
1493            }
1494        }
1495    }
1496    let reconciliation =
1497        reconcile_claude_mem_import(&plan, limit_per_table, &events, &write_results);
1498
1499    Ok(ClaudeMemImportReport {
1500        contract_version: MEMORY_CONTRACT_VERSION.to_string(),
1501        source: source_db_path.display().to_string(),
1502        target: target_memory_db_path.display().to_string(),
1503        dry_run,
1504        limit_per_table,
1505        import_all: limit_per_table.is_none(),
1506        imported_events: reconciliation.total_imported_events,
1507        already_present_events: reconciliation.total_already_present_events,
1508        planned_events,
1509        event_ids,
1510        event_ids_total,
1511        event_ids_truncated: event_ids_total > MAX_IMPORT_EVENT_IDS,
1512        reconciliation,
1513        plan,
1514    })
1515}
1516
1517fn read_claude_mem_observations(
1518    conn: &Connection,
1519    limit: Option<usize>,
1520) -> Result<Vec<MemoryEvent>> {
1521    let sql = format!(
1522        r#"
1523        SELECT id, memory_session_id, project, type, title, subtitle, text, facts,
1524               narrative, concepts, prompt_number, discovery_tokens, created_at_epoch,
1525               content_hash
1526        FROM observations
1527        ORDER BY created_at_epoch ASC, id ASC{}
1528        "#,
1529        claude_mem_limit_clause(limit)
1530    );
1531    let mut stmt = conn.prepare(&sql)?;
1532    let rows = stmt.query_map(params_from_iter(limit.map(|value| value as i64)), |row| {
1533        let id: i64 = row.get(0)?;
1534        let session_id: String = row.get(1)?;
1535        let project: String = row.get(2)?;
1536        let observation_type: String = row.get(3)?;
1537        let title: Option<String> = row.get(4)?;
1538        let subtitle: Option<String> = row.get(5)?;
1539        let text: Option<String> = row.get(6)?;
1540        let facts: Option<String> = row.get(7)?;
1541        let narrative: Option<String> = row.get(8)?;
1542        let concepts: Option<String> = row.get(9)?;
1543        let prompt_number: Option<i64> = row.get(10)?;
1544        let discovery_tokens: i64 = row.get(11)?;
1545        let created_at_epoch: i64 = row.get(12)?;
1546        let content_hash: Option<String> = row.get(13)?;
1547        let body = join_non_empty([title, subtitle, text, facts, narrative, concepts]);
1548        let mut event = MemoryEvent::new(
1549            MemoryEventKind::ImportedObservation,
1550            format!("claude-mem:observations:{id}"),
1551            body,
1552        )
1553        .with_session_id(session_id)
1554        .with_observed_at_unix(created_at_epoch)
1555        .with_import("claude-mem", format!("observations:{id}"))
1556        .with_metadata("project", project)
1557        .with_metadata("observation_type", observation_type)
1558        .with_metadata("discovery_tokens", discovery_tokens.to_string());
1559        if let Some(prompt_number) = prompt_number {
1560            event = event.with_metadata("prompt_number", prompt_number.to_string());
1561        }
1562        if let Some(content_hash) = content_hash {
1563            event = event.with_metadata("content_hash", content_hash);
1564        }
1565        Ok(event)
1566    })?;
1567    collect_rows(rows)
1568}
1569
1570fn read_claude_mem_summaries(conn: &Connection, limit: Option<usize>) -> Result<Vec<MemoryEvent>> {
1571    let sql = format!(
1572        r#"
1573        SELECT id, memory_session_id, project, request, investigated, learned,
1574               completed, next_steps, notes, prompt_number, discovery_tokens,
1575               created_at_epoch
1576        FROM session_summaries
1577        ORDER BY created_at_epoch ASC, id ASC{}
1578        "#,
1579        claude_mem_limit_clause(limit)
1580    );
1581    let mut stmt = conn.prepare(&sql)?;
1582    let rows = stmt.query_map(params_from_iter(limit.map(|value| value as i64)), |row| {
1583        let id: i64 = row.get(0)?;
1584        let session_id: String = row.get(1)?;
1585        let project: String = row.get(2)?;
1586        let request: Option<String> = row.get(3)?;
1587        let investigated: Option<String> = row.get(4)?;
1588        let learned: Option<String> = row.get(5)?;
1589        let completed: Option<String> = row.get(6)?;
1590        let next_steps: Option<String> = row.get(7)?;
1591        let notes: Option<String> = row.get(8)?;
1592        let prompt_number: Option<i64> = row.get(9)?;
1593        let discovery_tokens: i64 = row.get(10)?;
1594        let created_at_epoch: i64 = row.get(11)?;
1595        let body = join_non_empty([request, investigated, learned, completed, next_steps, notes]);
1596        let mut event = MemoryEvent::new(
1597            MemoryEventKind::ImportedSessionSummary,
1598            format!("claude-mem:session_summaries:{id}"),
1599            body,
1600        )
1601        .with_session_id(session_id)
1602        .with_observed_at_unix(created_at_epoch)
1603        .with_import("claude-mem", format!("session_summaries:{id}"))
1604        .with_metadata("project", project)
1605        .with_metadata("discovery_tokens", discovery_tokens.to_string());
1606        if let Some(prompt_number) = prompt_number {
1607            event = event.with_metadata("prompt_number", prompt_number.to_string());
1608        }
1609        Ok(event)
1610    })?;
1611    collect_rows(rows)
1612}
1613
1614fn read_claude_mem_user_prompts(
1615    conn: &Connection,
1616    limit: Option<usize>,
1617) -> Result<Vec<MemoryEvent>> {
1618    let sql = format!(
1619        r#"
1620        SELECT id, content_session_id, prompt_number, prompt_text, created_at_epoch
1621        FROM user_prompts
1622        ORDER BY created_at_epoch ASC, id ASC{}
1623        "#,
1624        claude_mem_limit_clause(limit)
1625    );
1626    let mut stmt = conn.prepare(&sql)?;
1627    let rows = stmt.query_map(params_from_iter(limit.map(|value| value as i64)), |row| {
1628        let id: i64 = row.get(0)?;
1629        let session_id: String = row.get(1)?;
1630        let prompt_number: i64 = row.get(2)?;
1631        let prompt_text: String = row.get(3)?;
1632        let created_at_epoch: i64 = row.get(4)?;
1633        Ok(MemoryEvent::new(
1634            MemoryEventKind::ImportedUserPrompt,
1635            format!("claude-mem:user_prompts:{id}"),
1636            prompt_text,
1637        )
1638        .with_session_id(session_id)
1639        .with_observed_at_unix(created_at_epoch)
1640        .with_import("claude-mem", format!("user_prompts:{id}"))
1641        .with_metadata("prompt_number", prompt_number.to_string()))
1642    })?;
1643    collect_rows(rows)
1644}
1645
1646fn claude_mem_limit_clause(limit: Option<usize>) -> &'static str {
1647    if limit.is_some() {
1648        "\n        LIMIT ?1"
1649    } else {
1650        ""
1651    }
1652}
1653
1654fn record_claude_mem_write(
1655    write_results: &mut BTreeMap<String, ClaudeMemTableWriteCounts>,
1656    event: &MemoryEvent,
1657    inserted: bool,
1658) {
1659    let Some(table) = claude_mem_event_table(event) else {
1660        return;
1661    };
1662    let counts = write_results.entry(table.to_string()).or_default();
1663    if inserted {
1664        counts.imported_events += 1;
1665    } else {
1666        counts.already_present_events += 1;
1667    }
1668}
1669
1670#[derive(Debug, Clone, Copy, Default)]
1671struct ClaudeMemTableWriteCounts {
1672    imported_events: usize,
1673    already_present_events: usize,
1674}
1675
1676fn reconcile_claude_mem_import(
1677    plan: &ClaudeMemImportPlan,
1678    limit_per_table: Option<usize>,
1679    events: &[MemoryEvent],
1680    write_results: &BTreeMap<String, ClaudeMemTableWriteCounts>,
1681) -> ClaudeMemImportReconciliation {
1682    let event_counts = claude_mem_event_counts(events);
1683    let observations = reconcile_claude_mem_table(
1684        &plan.observations,
1685        limit_per_table,
1686        event_counts
1687            .get("observations")
1688            .copied()
1689            .unwrap_or_default(),
1690        write_results
1691            .get("observations")
1692            .copied()
1693            .unwrap_or_default(),
1694    );
1695    let session_summaries = reconcile_claude_mem_table(
1696        &plan.session_summaries,
1697        limit_per_table,
1698        event_counts
1699            .get("session_summaries")
1700            .copied()
1701            .unwrap_or_default(),
1702        write_results
1703            .get("session_summaries")
1704            .copied()
1705            .unwrap_or_default(),
1706    );
1707    let user_prompts = reconcile_claude_mem_table(
1708        &plan.user_prompts,
1709        limit_per_table,
1710        event_counts
1711            .get("user_prompts")
1712            .copied()
1713            .unwrap_or_default(),
1714        write_results
1715            .get("user_prompts")
1716            .copied()
1717            .unwrap_or_default(),
1718    );
1719    let total_source_rows =
1720        observations.source_rows + session_summaries.source_rows + user_prompts.source_rows;
1721    let total_read_events =
1722        observations.read_events + session_summaries.read_events + user_prompts.read_events;
1723    let total_planned_events = observations.planned_events
1724        + session_summaries.planned_events
1725        + user_prompts.planned_events;
1726    let total_imported_events = observations.imported_events
1727        + session_summaries.imported_events
1728        + user_prompts.imported_events;
1729    let total_already_present_events = observations.already_present_events
1730        + session_summaries.already_present_events
1731        + user_prompts.already_present_events;
1732    let total_skipped_source_rows = observations.skipped_source_rows
1733        + session_summaries.skipped_source_rows
1734        + user_prompts.skipped_source_rows;
1735    let complete = observations.complete && session_summaries.complete && user_prompts.complete;
1736    ClaudeMemImportReconciliation {
1737        observations,
1738        session_summaries,
1739        user_prompts,
1740        total_source_rows,
1741        total_read_events,
1742        total_planned_events,
1743        total_imported_events,
1744        total_already_present_events,
1745        total_skipped_source_rows,
1746        complete,
1747    }
1748}
1749
1750fn reconcile_claude_mem_table(
1751    table_plan: &ClaudeMemTablePlan,
1752    limit_per_table: Option<usize>,
1753    read_events: usize,
1754    write_counts: ClaudeMemTableWriteCounts,
1755) -> ClaudeMemTableReconciliation {
1756    let source_rows = table_plan.rows;
1757    let skipped_source_rows = source_rows.saturating_sub(read_events);
1758    ClaudeMemTableReconciliation {
1759        table: table_plan.table.clone(),
1760        source_rows,
1761        read_events,
1762        planned_events: read_events,
1763        imported_events: write_counts.imported_events,
1764        already_present_events: write_counts.already_present_events,
1765        skipped_source_rows,
1766        limit_per_table,
1767        complete: skipped_source_rows == 0,
1768    }
1769}
1770
1771fn claude_mem_event_counts(events: &[MemoryEvent]) -> BTreeMap<String, usize> {
1772    let mut counts = BTreeMap::new();
1773    for event in events {
1774        if let Some(table) = claude_mem_event_table(event) {
1775            *counts.entry(table.to_string()).or_insert(0) += 1;
1776        }
1777    }
1778    counts
1779}
1780
1781fn claude_mem_event_table(event: &MemoryEvent) -> Option<&str> {
1782    if event.imported_from.as_deref() != Some("claude-mem") {
1783        return None;
1784    }
1785    event
1786        .imported_id
1787        .as_deref()?
1788        .split_once(':')
1789        .map(|(table, _)| table)
1790}
1791
1792fn collect_rows<T>(rows: impl Iterator<Item = rusqlite::Result<T>>) -> Result<Vec<T>> {
1793    let mut values = Vec::new();
1794    for row in rows {
1795        values.push(row?);
1796    }
1797    Ok(values)
1798}
1799
1800fn join_non_empty(values: impl IntoIterator<Item = Option<String>>) -> String {
1801    let parts: Vec<String> = values
1802        .into_iter()
1803        .flatten()
1804        .map(|value| value.trim().to_string())
1805        .filter(|value| !value.is_empty())
1806        .collect();
1807    parts.join("\n\n")
1808}
1809
1810/// Authored finding-graph node kinds (`#trt1`): human/agent-authored knowledge
1811/// anchored to code, distinct from passively-captured `MemoryEvent`s.
1812#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1813pub enum AuthoredNodeKind {
1814    Finding,
1815    Decision,
1816    Note,
1817}
1818
1819impl AuthoredNodeKind {
1820    pub fn as_str(self) -> &'static str {
1821        match self {
1822            Self::Finding => "finding",
1823            Self::Decision => "decision",
1824            Self::Note => "note",
1825        }
1826    }
1827
1828    pub fn parse(raw: &str) -> Result<Self> {
1829        match raw {
1830            "finding" => Ok(Self::Finding),
1831            "decision" => Ok(Self::Decision),
1832            "note" => Ok(Self::Note),
1833            other => {
1834                bail!("unsupported authored node kind `{other}` (expected finding|decision|note)")
1835            }
1836        }
1837    }
1838}
1839
1840/// Build a `GraphProjection` for an authored finding/decision/note node
1841/// (`#trt1`), anchored to a stable symbol handle (graph node id / tagpath — NOT
1842/// a line number) via an `annotates` edge. `confidence` is clamped to `0..=1`;
1843/// `observed_at_unix` is the freshness stamp. The node id is content-stable, so
1844/// re-authoring the same text on the same anchor dedupes instead of duplicating.
1845pub fn authored_node_projection(
1846    kind: AuthoredNodeKind,
1847    text: &str,
1848    anchor_handle: &str,
1849    confidence: f64,
1850    observed_at_unix: i64,
1851    session_id: Option<&str>,
1852) -> GraphProjection {
1853    let mut projection = GraphProjection::default();
1854    let confidence = confidence.clamp(0.0, 1.0);
1855    let node_id = format!(
1856        "{}:{}",
1857        kind.as_str(),
1858        blake3::hash(format!("{}|{}|{}", kind.as_str(), anchor_handle, text).as_bytes()).to_hex()
1859    );
1860    let label: String = text.chars().take(80).collect();
1861    let mut node = GraphNode::new(&node_id, kind.as_str(), label)
1862        .with_property("text", text)
1863        .with_property("anchor_handle", anchor_handle)
1864        .with_property("confidence", format!("{confidence:.3}"))
1865        .with_property("observed_at_unix", observed_at_unix.to_string())
1866        .with_provenance(GraphProvenance::new("tsift-findings", anchor_handle))
1867        .with_freshness(GraphFreshness {
1868            content_hash: None,
1869            observed_at_unix: Some(observed_at_unix),
1870        });
1871    if let Some(session_id) = session_id {
1872        node = node.with_property("session_id", session_id);
1873    }
1874    projection.nodes.push(node);
1875    projection.edges.push(
1876        GraphEdge::new(node_id, anchor_handle, "annotates")
1877            .with_property("authored_kind", kind.as_str())
1878            .with_provenance(GraphProvenance::new("tsift-findings", anchor_handle)),
1879    );
1880    projection
1881}
1882
1883#[cfg(test)]
1884mod tests {
1885    use super::*;
1886    use tempfile::TempDir;
1887
1888    #[test]
1889    fn handoff_plan_defers_oversized_events_before_model_call() {
1890        let small = MemoryEvent::new(MemoryEventKind::PromptTarget, "session.md", "short");
1891        let large = MemoryEvent::new(
1892            MemoryEventKind::ToolResultArtifact,
1893            "artifact:log",
1894            "x".repeat(10_000),
1895        );
1896        let plan = plan_capture_handoff(
1897            &[small, large],
1898            MemoryBudget {
1899                max_prompt_tokens: 1000,
1900                reserve_tokens: 100,
1901                max_event_tokens: 500,
1902            },
1903        );
1904        assert_eq!(plan.status, "split_required");
1905        assert_eq!(plan.included_events.len(), 1);
1906        assert_eq!(
1907            plan.deferred_events[0].reason,
1908            "event_exceeds_max_event_tokens"
1909        );
1910    }
1911
1912    #[test]
1913    fn authored_node_projection_anchors_and_dedupes() {
1914        let p = authored_node_projection(
1915            AuthoredNodeKind::Finding,
1916            "decay ranking ships in tsift-memory",
1917            "symbol:rank_memory_events",
1918            1.7, // out of range -> clamps to 1.0
1919            1_700_000_000,
1920            Some("sess-1"),
1921        );
1922        assert_eq!(p.nodes.len(), 1);
1923        assert_eq!(p.edges.len(), 1);
1924        let node = &p.nodes[0];
1925        assert_eq!(node.kind, "finding");
1926        assert_eq!(node.properties.get("confidence").unwrap(), "1.000");
1927        assert_eq!(
1928            node.properties.get("anchor_handle").unwrap(),
1929            "symbol:rank_memory_events"
1930        );
1931        assert_eq!(p.edges[0].kind, "annotates");
1932        assert_eq!(p.edges[0].to_id, "symbol:rank_memory_events");
1933        assert_eq!(
1934            node.freshness.as_ref().unwrap().observed_at_unix,
1935            Some(1_700_000_000)
1936        );
1937
1938        // Same kind+anchor+text -> identical (deduping) node id.
1939        let again = authored_node_projection(
1940            AuthoredNodeKind::Finding,
1941            "decay ranking ships in tsift-memory",
1942            "symbol:rank_memory_events",
1943            0.5,
1944            1_700_000_999,
1945            None,
1946        );
1947        assert_eq!(again.nodes[0].id, node.id);
1948        assert!(AuthoredNodeKind::parse("note").is_ok());
1949        assert!(AuthoredNodeKind::parse("bogus").is_err());
1950    }
1951
1952    #[test]
1953    fn memory_store_initializes_schema_and_dedupes_imported_events() {
1954        let dir = TempDir::new().unwrap();
1955        let db = dir.path().join("memory.db");
1956        let store = MemoryStore::open_or_create(&db).unwrap();
1957        let event = MemoryEvent::new(
1958            MemoryEventKind::ImportedObservation,
1959            "claude-mem:observations:1",
1960            "fact",
1961        )
1962        .with_session_id("session-a")
1963        .with_import("claude-mem", "observations:1");
1964
1965        let first = store.insert_event(&event).unwrap();
1966        let second = store.insert_event(&event).unwrap();
1967        assert_eq!(first, second);
1968        assert_eq!(store.event_count().unwrap(), 1);
1969    }
1970
1971    #[test]
1972    fn claude_mem_pending_messages_are_reported_but_not_imported() {
1973        let dir = TempDir::new().unwrap();
1974        let db = dir.path().join("claude-mem.db");
1975        let conn = Connection::open(&db).unwrap();
1976        conn.execute_batch(
1977            r#"
1978            CREATE TABLE pending_messages (
1979              id INTEGER PRIMARY KEY,
1980              session_db_id INTEGER NOT NULL,
1981              content_session_id TEXT NOT NULL,
1982              message_type TEXT NOT NULL CHECK(message_type IN ('observation', 'summarize')),
1983              tool_name TEXT,
1984              tool_input TEXT,
1985              tool_response TEXT,
1986              cwd TEXT,
1987              last_user_message TEXT,
1988              last_assistant_message TEXT,
1989              prompt_number INTEGER,
1990              status TEXT NOT NULL DEFAULT 'pending',
1991              retry_count INTEGER NOT NULL DEFAULT 0,
1992              created_at_epoch INTEGER NOT NULL,
1993              started_processing_at_epoch INTEGER,
1994              completed_at_epoch INTEGER,
1995              failed_at_epoch INTEGER
1996            );
1997            INSERT INTO pending_messages (
1998              id, session_db_id, content_session_id, message_type, tool_name,
1999              tool_input, tool_response, cwd, last_user_message,
2000              last_assistant_message, prompt_number, status, retry_count,
2001              created_at_epoch
2002            ) VALUES (
2003              1, 10, 'session-a', 'observation', 'bash',
2004              '{"cmd":"cat large.log"}', 'raw tool response', '/repo',
2005              'run tests', 'done', 7, 'pending', 0, 1700000000
2006            );
2007            "#,
2008        )
2009        .unwrap();
2010
2011        let plan = inspect_claude_mem(&db).unwrap();
2012        assert_eq!(plan.pending_messages.rows, 1);
2013        assert!(!plan.pending_messages.supported);
2014        assert_eq!(
2015            plan.pending_messages.unsupported_reason.as_deref(),
2016            Some(PENDING_MESSAGES_EXCLUSION_REASON)
2017        );
2018        assert!(
2019            plan.warnings
2020                .iter()
2021                .any(|warning| warning.contains("pending_messages"))
2022        );
2023
2024        let read_report = read_claude_mem_events(&db, Some(100)).unwrap();
2025        assert!(read_report.events.is_empty());
2026    }
2027
2028    #[test]
2029    fn claude_mem_import_all_reconciles_supported_table_counts() {
2030        let dir = TempDir::new().unwrap();
2031        let source = dir.path().join("claude-mem.db");
2032        let target = dir.path().join("memory.db");
2033        let conn = Connection::open(&source).unwrap();
2034        create_supported_claude_mem_fixture(&conn);
2035
2036        let dry_run = import_claude_mem(&source, &target, None, true).unwrap();
2037        assert!(dry_run.import_all);
2038        assert!(dry_run.reconciliation.complete);
2039        assert_eq!(dry_run.reconciliation.total_source_rows, 6);
2040        assert_eq!(dry_run.reconciliation.total_read_events, 6);
2041        assert_eq!(dry_run.reconciliation.total_skipped_source_rows, 0);
2042        assert_eq!(dry_run.reconciliation.observations.source_rows, 2);
2043        assert_eq!(dry_run.reconciliation.session_summaries.source_rows, 1);
2044        assert_eq!(dry_run.reconciliation.user_prompts.source_rows, 3);
2045
2046        let applied = import_claude_mem(&source, &target, None, false).unwrap();
2047        assert_eq!(applied.planned_events, 6);
2048        assert_eq!(applied.imported_events, 6);
2049        assert_eq!(applied.already_present_events, 0);
2050        assert_eq!(applied.event_ids_total, 6);
2051        assert_eq!(applied.event_ids.len(), 6);
2052        assert!(!applied.event_ids_truncated);
2053        assert_eq!(
2054            MemoryStore::open_or_create(&target)
2055                .unwrap()
2056                .event_count()
2057                .unwrap(),
2058            6
2059        );
2060
2061        let second_apply = import_claude_mem(&source, &target, None, false).unwrap();
2062        assert_eq!(second_apply.imported_events, 0);
2063        assert_eq!(second_apply.already_present_events, 6);
2064        assert_eq!(second_apply.reconciliation.total_already_present_events, 6);
2065    }
2066
2067    #[test]
2068    fn claude_mem_limited_import_reports_skipped_source_rows() {
2069        let dir = TempDir::new().unwrap();
2070        let source = dir.path().join("claude-mem.db");
2071        let conn = Connection::open(&source).unwrap();
2072        create_supported_claude_mem_fixture(&conn);
2073
2074        let read_report = read_claude_mem_events(&source, Some(1)).unwrap();
2075        assert!(!read_report.import_all);
2076        assert!(!read_report.reconciliation.complete);
2077        assert_eq!(read_report.reconciliation.total_source_rows, 6);
2078        assert_eq!(read_report.reconciliation.total_read_events, 3);
2079        assert_eq!(read_report.reconciliation.total_skipped_source_rows, 3);
2080        assert_eq!(
2081            read_report.reconciliation.observations.skipped_source_rows,
2082            1
2083        );
2084        assert_eq!(
2085            read_report
2086                .reconciliation
2087                .session_summaries
2088                .skipped_source_rows,
2089            0
2090        );
2091        assert_eq!(
2092            read_report.reconciliation.user_prompts.skipped_source_rows,
2093            2
2094        );
2095    }
2096
2097    #[test]
2098    fn claude_mem_import_caps_reported_event_ids() {
2099        let dir = TempDir::new().unwrap();
2100        let source = dir.path().join("claude-mem.db");
2101        let target = dir.path().join("memory.db");
2102        let conn = Connection::open(&source).unwrap();
2103        conn.execute_batch(
2104            r#"
2105            CREATE TABLE user_prompts (
2106              id INTEGER PRIMARY KEY,
2107              content_session_id TEXT NOT NULL,
2108              prompt_number INTEGER NOT NULL,
2109              prompt_text TEXT NOT NULL,
2110              created_at_epoch INTEGER NOT NULL
2111            );
2112            "#,
2113        )
2114        .unwrap();
2115        for id in 1..=(MAX_IMPORT_EVENT_IDS + 1) {
2116            conn.execute(
2117                "INSERT INTO user_prompts (id, content_session_id, prompt_number, prompt_text, created_at_epoch) VALUES (?1, 'session-a', ?2, ?3, ?4)",
2118                params![id as i64, id as i64, format!("prompt {id}"), 1700000000_i64 + id as i64],
2119            )
2120            .unwrap();
2121        }
2122
2123        let applied = import_claude_mem(&source, &target, None, false).unwrap();
2124        assert_eq!(applied.planned_events, MAX_IMPORT_EVENT_IDS + 1);
2125        assert_eq!(applied.event_ids_total, MAX_IMPORT_EVENT_IDS + 1);
2126        assert_eq!(applied.event_ids.len(), MAX_IMPORT_EVENT_IDS);
2127        assert!(applied.event_ids_truncated);
2128        assert!(applied.reconciliation.complete);
2129    }
2130
2131    fn create_supported_claude_mem_fixture(conn: &Connection) {
2132        conn.execute_batch(
2133            r#"
2134            CREATE TABLE observations (
2135              id INTEGER PRIMARY KEY,
2136              memory_session_id TEXT NOT NULL,
2137              project TEXT NOT NULL,
2138              type TEXT NOT NULL,
2139              title TEXT,
2140              subtitle TEXT,
2141              text TEXT,
2142              facts TEXT,
2143              narrative TEXT,
2144              concepts TEXT,
2145              prompt_number INTEGER,
2146              discovery_tokens INTEGER NOT NULL,
2147              created_at_epoch INTEGER NOT NULL,
2148              content_hash TEXT
2149            );
2150            CREATE TABLE session_summaries (
2151              id INTEGER PRIMARY KEY,
2152              memory_session_id TEXT NOT NULL,
2153              project TEXT NOT NULL,
2154              request TEXT,
2155              investigated TEXT,
2156              learned TEXT,
2157              completed TEXT,
2158              next_steps TEXT,
2159              notes TEXT,
2160              prompt_number INTEGER,
2161              discovery_tokens INTEGER NOT NULL,
2162              created_at_epoch INTEGER NOT NULL
2163            );
2164            CREATE TABLE user_prompts (
2165              id INTEGER PRIMARY KEY,
2166              content_session_id TEXT NOT NULL,
2167              prompt_number INTEGER NOT NULL,
2168              prompt_text TEXT NOT NULL,
2169              created_at_epoch INTEGER NOT NULL
2170            );
2171            INSERT INTO observations (
2172              id, memory_session_id, project, type, title, subtitle, text, facts,
2173              narrative, concepts, prompt_number, discovery_tokens, created_at_epoch,
2174              content_hash
2175            ) VALUES
2176              (1, 'session-a', 'agent-loop', 'fact', 'Title A', NULL, 'Text A', NULL, NULL, 'tsift', 1, 42, 1700000001, 'hash-a'),
2177              (2, 'session-b', 'agent-loop', 'fact', 'Title B', NULL, 'Text B', NULL, NULL, 'memory', 2, 43, 1700000002, 'hash-b');
2178            INSERT INTO session_summaries (
2179              id, memory_session_id, project, request, investigated, learned,
2180              completed, next_steps, notes, prompt_number, discovery_tokens,
2181              created_at_epoch
2182            ) VALUES (
2183              1, 'session-a', 'agent-loop', 'replace claude-mem', 'tables',
2184              'all rows', 'imported', 'refresh graph', NULL, 3, 44, 1700000003
2185            );
2186            INSERT INTO user_prompts (
2187              id, content_session_id, prompt_number, prompt_text, created_at_epoch
2188            ) VALUES
2189              (1, 'session-a', 1, 'first prompt', 1700000004),
2190              (2, 'session-a', 2, 'second prompt', 1700000005),
2191              (3, 'session-b', 1, 'third prompt', 1700000006);
2192            "#,
2193        )
2194        .unwrap();
2195    }
2196
2197    #[test]
2198    fn read_memory_events_round_trips_closeout_events() {
2199        let dir = TempDir::new().unwrap();
2200        let db = dir.path().join("memory.db");
2201        let store = MemoryStore::open_or_create(&db).unwrap();
2202        for event in agent_doc_closeout_events(
2203            Path::new("tasks/software/tsift.md"),
2204            "do [#tsiftmemhooks]",
2205            "wired closeout capture",
2206            Some("abc123"),
2207            "ok",
2208        ) {
2209            store.insert_event(&event).unwrap();
2210        }
2211
2212        let events = read_memory_events(&db, 10).unwrap();
2213        assert!(events.iter().any(|event| {
2214            event.kind == MemoryEventKind::PromptTarget
2215                && event.text == "do [#tsiftmemhooks]"
2216                && event.session_id.as_deref() == Some("tasks/software/tsift.md")
2217        }));
2218        assert!(events.iter().any(|event| {
2219            event.kind == MemoryEventKind::CloseoutProof
2220                && event.metadata.get("commit_hash") == Some(&"abc123".to_string())
2221        }));
2222    }
2223
2224    #[test]
2225    fn read_memory_events_with_policy_orders_explicitly() {
2226        let dir = TempDir::new().unwrap();
2227        let db = dir.path().join("memory.db");
2228        let store = MemoryStore::open_or_create(&db).unwrap();
2229        store
2230            .insert_event(
2231                &MemoryEvent::new(MemoryEventKind::ResponseSummary, "old", "old note")
2232                    .with_observed_at_unix(1_700_000_000),
2233            )
2234            .unwrap();
2235        store
2236            .insert_event(
2237                &MemoryEvent::new(MemoryEventKind::ResponseSummary, "new", "new note")
2238                    .with_observed_at_unix(1_700_000_100),
2239            )
2240            .unwrap();
2241
2242        let legacy = read_memory_events(&db, 1).unwrap();
2243        assert_eq!(legacy[0].source_ref, "old");
2244        let recent =
2245            read_memory_events_with_policy(&db, &MemoryReadPolicy::recent_first(), 1).unwrap();
2246        assert_eq!(recent[0].source_ref, "new");
2247        let oldest =
2248            read_memory_events_with_policy(&db, &MemoryReadPolicy::oldest_first(), 1).unwrap();
2249        assert_eq!(oldest[0].source_ref, "old");
2250    }
2251
2252    #[test]
2253    fn query_relevant_read_policy_requires_query_and_hashes_selection() {
2254        let dir = TempDir::new().unwrap();
2255        let db = dir.path().join("memory.db");
2256        let store = MemoryStore::open_or_create(&db).unwrap();
2257        store
2258            .insert_event(
2259                &MemoryEvent::new(
2260                    MemoryEventKind::ResponseSummary,
2261                    "needle",
2262                    "semantic graph retrieval",
2263                )
2264                .with_observed_at_unix(1_700_000_000),
2265            )
2266            .unwrap();
2267        store
2268            .insert_event(
2269                &MemoryEvent::new(MemoryEventKind::ResponseSummary, "recent", "fresh note")
2270                    .with_observed_at_unix(1_700_000_100),
2271            )
2272            .unwrap();
2273
2274        let missing_query =
2275            read_memory_events_with_policy(&db, &MemoryReadPolicy::query_relevant(" "), 2);
2276        assert!(missing_query.is_err());
2277        let policy = MemoryReadPolicy::query_relevant("semantic graph");
2278        let events = read_memory_events_with_policy(&db, &policy, 2).unwrap();
2279        assert!(events.iter().any(|event| event.source_ref == "needle"));
2280        let watermark = memory_read_watermark(&db, &policy, 2, &events).unwrap();
2281        assert_eq!(watermark.events_available, 2);
2282        assert_eq!(watermark.events_read, events.len());
2283        assert!(!watermark.content_hash.is_empty());
2284        assert!(!watermark.source_watermark.is_empty());
2285
2286        let recent_policy = MemoryReadPolicy::recent_first();
2287        let recent = read_memory_events_with_policy(&db, &recent_policy, 1).unwrap();
2288        let recent_watermark = memory_read_watermark(&db, &recent_policy, 1, &recent).unwrap();
2289        assert_ne!(watermark.content_hash, recent_watermark.content_hash);
2290        assert_ne!(
2291            watermark.source_watermark,
2292            recent_watermark.source_watermark
2293        );
2294    }
2295
2296    #[test]
2297    fn memory_schema_creates_candidate_indexes_and_fts_table() {
2298        let dir = TempDir::new().unwrap();
2299        let db = dir.path().join("memory.db");
2300        MemoryStore::open_or_create(&db).unwrap();
2301
2302        let conn = rusqlite::Connection::open(&db).unwrap();
2303        for name in [
2304            "idx_memory_events_observed_created",
2305            "idx_memory_events_created_at",
2306            "memory_events_fts",
2307        ] {
2308            let exists: i64 = conn
2309                .query_row(
2310                    "SELECT COUNT(*) FROM sqlite_master WHERE name = ?1",
2311                    [name],
2312                    |row| row.get(0),
2313                )
2314                .unwrap();
2315            assert_eq!(exists, 1, "expected schema object {name}");
2316        }
2317    }
2318
2319    #[test]
2320    fn read_memory_event_candidates_uses_fts_and_recent_bound() {
2321        let dir = TempDir::new().unwrap();
2322        let db = dir.path().join("memory.db");
2323        let store = MemoryStore::open_or_create(&db).unwrap();
2324        let now = 1_700_000_000;
2325        for index in 0..20 {
2326            store
2327                .insert_event(
2328                    &MemoryEvent::new(
2329                        MemoryEventKind::ResponseSummary,
2330                        format!("old-{index}"),
2331                        format!("ordinary memory event {index}"),
2332                    )
2333                    .with_observed_at_unix(now - 1_000 - index),
2334                )
2335                .unwrap();
2336        }
2337        store
2338            .insert_event(
2339                &MemoryEvent::new(
2340                    MemoryEventKind::ResponseSummary,
2341                    "needle",
2342                    "semantic needle graph retrieval",
2343                )
2344                .with_observed_at_unix(now - 10_000),
2345            )
2346            .unwrap();
2347        store
2348            .insert_event(
2349                &MemoryEvent::new(
2350                    MemoryEventKind::ResponseSummary,
2351                    "recent",
2352                    "fresh unrelated release note",
2353                )
2354                .with_observed_at_unix(now),
2355            )
2356            .unwrap();
2357
2358        let candidates = read_memory_event_candidates(&db, "semantic needle", 5).unwrap();
2359        assert!(candidates.len() <= 5);
2360        assert!(candidates.iter().any(|event| event.source_ref == "needle"));
2361        assert!(candidates.iter().any(|event| event.source_ref == "recent"));
2362    }
2363
2364    #[test]
2365    fn read_memory_event_candidates_returns_recent_for_empty_query() {
2366        let dir = TempDir::new().unwrap();
2367        let db = dir.path().join("memory.db");
2368        let store = MemoryStore::open_or_create(&db).unwrap();
2369        store
2370            .insert_event(
2371                &MemoryEvent::new(MemoryEventKind::ResponseSummary, "old", "older note")
2372                    .with_observed_at_unix(1_700_000_000),
2373            )
2374            .unwrap();
2375        store
2376            .insert_event(
2377                &MemoryEvent::new(MemoryEventKind::ResponseSummary, "recent", "newer note")
2378                    .with_observed_at_unix(1_700_000_100),
2379            )
2380            .unwrap();
2381
2382        let candidates = read_memory_event_candidates(&db, " ", 1).unwrap();
2383        assert_eq!(candidates.len(), 1);
2384        assert_eq!(candidates[0].source_ref, "recent");
2385    }
2386
2387    #[test]
2388    fn budget_guard_fails_closed_with_retryable_chunks() {
2389        let report = guard_memory_handoff(
2390            MemoryBudgetGuardInput::new("tool.log", "tool_result", "x".repeat(5_000)),
2391            MemoryBudget {
2392                max_prompt_tokens: 1000,
2393                reserve_tokens: 100,
2394                max_event_tokens: 400,
2395            },
2396        );
2397
2398        assert!(!report.allowed);
2399        assert_eq!(report.status, "blocked_split_required");
2400        assert!(report.replacement.is_some());
2401        assert!(report.retryable_chunk_plan.len() > 1);
2402        assert!(
2403            report
2404                .retryable_chunk_plan
2405                .iter()
2406                .all(|chunk| chunk.token_estimate <= 400)
2407        );
2408    }
2409
2410    #[test]
2411    fn budget_guard_replaces_transcripts_with_session_review_commands() {
2412        let report = guard_memory_handoff(
2413            MemoryBudgetGuardInput::new("session.jsonl", "transcript", "x".repeat(5_000)),
2414            MemoryBudget {
2415                max_prompt_tokens: 1000,
2416                reserve_tokens: 100,
2417                max_event_tokens: 400,
2418            },
2419        );
2420        let replacement = report.replacement.unwrap();
2421        assert_eq!(
2422            replacement.strategy,
2423            "replace_raw_transcript_with_session_review_or_context_pack_handle"
2424        );
2425        assert!(
2426            replacement
2427                .session_review_command
2428                .contains("session-review")
2429        );
2430        assert!(replacement.context_command.contains("context-pack"));
2431    }
2432}