tandem-memory 0.4.12

Memory storage and embedding utilities for Tandem
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
// Memory Context Types
// Type definitions and error types for the memory system

use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use thiserror::Error;

/// Memory tier - determines persistence level
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum MemoryTier {
    /// Ephemeral session memory - cleared when session ends
    Session,
    /// Persistent project memory - survives across sessions
    Project,
    /// Cross-project global memory - user preferences and patterns
    Global,
}

impl MemoryTier {
    /// Get the table prefix for this tier
    pub fn table_prefix(&self) -> &'static str {
        match self {
            MemoryTier::Session => "session",
            MemoryTier::Project => "project",
            MemoryTier::Global => "global",
        }
    }
}

impl std::fmt::Display for MemoryTier {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            MemoryTier::Session => write!(f, "session"),
            MemoryTier::Project => write!(f, "project"),
            MemoryTier::Global => write!(f, "global"),
        }
    }
}

/// A memory chunk - unit of storage
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryChunk {
    pub id: String,
    pub content: String,
    pub tier: MemoryTier,
    pub session_id: Option<String>,
    pub project_id: Option<String>,
    pub source: String, // e.g., "user_message", "assistant_response", "file_content"
    // File-derived fields (only set when source == "file")
    pub source_path: Option<String>,
    pub source_mtime: Option<i64>,
    pub source_size: Option<i64>,
    pub source_hash: Option<String>,
    pub created_at: DateTime<Utc>,
    pub token_count: i64,
    pub metadata: Option<serde_json::Value>,
}

/// Search result with similarity score
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemorySearchResult {
    pub chunk: MemoryChunk,
    pub similarity: f64,
}

/// Memory configuration for a project
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryConfig {
    /// Maximum chunks to store per project
    pub max_chunks: i64,
    /// Chunk size in tokens
    pub chunk_size: i64,
    /// Number of chunks to retrieve
    pub retrieval_k: i64,
    /// Whether auto-cleanup is enabled
    pub auto_cleanup: bool,
    /// Session memory retention in days
    pub session_retention_days: i64,
    /// Token budget for memory context injection
    pub token_budget: i64,
    /// Overlap between chunks in tokens
    pub chunk_overlap: i64,
}

impl Default for MemoryConfig {
    fn default() -> Self {
        Self {
            max_chunks: 10_000,
            chunk_size: 512,
            retrieval_k: 5,
            auto_cleanup: true,
            session_retention_days: 30,
            token_budget: 5000,
            chunk_overlap: 64,
        }
    }
}

/// Memory storage statistics
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryStats {
    /// Total number of chunks
    pub total_chunks: i64,
    /// Number of session chunks
    pub session_chunks: i64,
    /// Number of project chunks
    pub project_chunks: i64,
    /// Number of global chunks
    pub global_chunks: i64,
    /// Total size in bytes
    pub total_bytes: i64,
    /// Session memory size in bytes
    pub session_bytes: i64,
    /// Project memory size in bytes
    pub project_bytes: i64,
    /// Global memory size in bytes
    pub global_bytes: i64,
    /// Database file size in bytes
    pub file_size: i64,
    /// Last cleanup timestamp
    pub last_cleanup: Option<DateTime<Utc>>,
}

/// Context to inject into messages
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryContext {
    /// Recent messages from current session
    pub current_session: Vec<MemoryChunk>,
    /// Relevant historical chunks
    pub relevant_history: Vec<MemoryChunk>,
    /// Important project facts
    pub project_facts: Vec<MemoryChunk>,
    /// Total tokens in context
    pub total_tokens: i64,
}

/// Metadata describing how memory retrieval executed for a single query.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryRetrievalMeta {
    pub used: bool,
    pub chunks_total: usize,
    pub session_chunks: usize,
    pub history_chunks: usize,
    pub project_fact_chunks: usize,
    pub score_min: Option<f64>,
    pub score_max: Option<f64>,
}

impl MemoryContext {
    /// Format the context for injection into a prompt
    pub fn format_for_injection(&self) -> String {
        let mut parts = Vec::new();

        if !self.current_session.is_empty() {
            parts.push("<current_session>".to_string());
            for chunk in &self.current_session {
                parts.push(format!("- {}", chunk.content));
            }
            parts.push("</current_session>".to_string());
        }

        if !self.relevant_history.is_empty() {
            parts.push("<relevant_history>".to_string());
            for chunk in &self.relevant_history {
                parts.push(format!("- {}", chunk.content));
            }
            parts.push("</relevant_history>".to_string());
        }

        if !self.project_facts.is_empty() {
            parts.push("<project_facts>".to_string());
            for chunk in &self.project_facts {
                parts.push(format!("- {}", chunk.content));
            }
            parts.push("</project_facts>".to_string());
        }

        if parts.is_empty() {
            String::new()
        } else {
            format!("<memory_context>\n{}\n</memory_context>", parts.join("\n"))
        }
    }
}

/// Request to store a message
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StoreMessageRequest {
    pub content: String,
    pub tier: MemoryTier,
    pub session_id: Option<String>,
    pub project_id: Option<String>,
    pub source: String,
    // File-derived fields (only set when source == "file")
    pub source_path: Option<String>,
    pub source_mtime: Option<i64>,
    pub source_size: Option<i64>,
    pub source_hash: Option<String>,
    pub metadata: Option<serde_json::Value>,
}

/// Project-scoped memory statistics (filtered by project_id)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProjectMemoryStats {
    pub project_id: String,
    /// Total chunks stored under this project_id (all sources)
    pub project_chunks: i64,
    pub project_bytes: i64,
    /// Chunks/bytes that came from workspace file indexing (source == "file")
    pub file_index_chunks: i64,
    pub file_index_bytes: i64,
    /// Number of indexed files currently tracked for this project_id
    pub indexed_files: i64,
    /// Last time indexing completed for this project_id (if known)
    pub last_indexed_at: Option<DateTime<Utc>>,
    /// Last run totals (if known)
    pub last_total_files: Option<i64>,
    pub last_processed_files: Option<i64>,
    pub last_indexed_files: Option<i64>,
    pub last_skipped_files: Option<i64>,
    pub last_errors: Option<i64>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ClearFileIndexResult {
    pub chunks_deleted: i64,
    pub bytes_estimated: i64,
    pub did_vacuum: bool,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum MemoryImportFormat {
    Directory,
    Openclaw,
}

impl std::fmt::Display for MemoryImportFormat {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            MemoryImportFormat::Directory => write!(f, "directory"),
            MemoryImportFormat::Openclaw => write!(f, "openclaw"),
        }
    }
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryImportRequest {
    pub root_path: String,
    pub format: MemoryImportFormat,
    pub tier: MemoryTier,
    pub session_id: Option<String>,
    pub project_id: Option<String>,
    pub sync_deletes: bool,
}

#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct MemoryImportStats {
    pub discovered_files: usize,
    pub files_processed: usize,
    pub indexed_files: usize,
    pub skipped_files: usize,
    pub deleted_files: usize,
    pub chunks_created: usize,
    pub errors: usize,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryImportProgress {
    pub files_processed: usize,
    pub total_files: usize,
    pub indexed_files: usize,
    pub skipped_files: usize,
    pub deleted_files: usize,
    pub errors: usize,
    pub chunks_created: usize,
    pub current_file: String,
}

/// Request to search memory
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchMemoryRequest {
    pub query: String,
    pub tier: Option<MemoryTier>,
    pub project_id: Option<String>,
    pub session_id: Option<String>,
    pub limit: Option<i64>,
}

/// Embedding backend health surfaced to UI/events.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EmbeddingHealth {
    /// "ok" when embeddings are available, "degraded_disabled" otherwise.
    pub status: String,
    /// Optional reason when degraded.
    pub reason: Option<String>,
}

/// Memory error types
#[derive(Error, Debug)]
pub enum MemoryError {
    #[error("Database error: {0}")]
    Database(#[from] rusqlite::Error),

    #[error("IO error: {0}")]
    Io(#[from] std::io::Error),

    #[error("Serialization error: {0}")]
    Serialization(#[from] serde_json::Error),

    #[error("Embedding error: {0}")]
    Embedding(String),

    #[error("Chunking error: {0}")]
    Chunking(String),

    #[error("Invalid configuration: {0}")]
    InvalidConfig(String),

    #[error("Not found: {0}")]
    NotFound(String),

    #[error("Tokenization error: {0}")]
    Tokenization(String),

    #[error("Lock error: {0}")]
    Lock(String),
}

impl From<String> for MemoryError {
    fn from(err: String) -> Self {
        MemoryError::InvalidConfig(err)
    }
}

impl From<&str> for MemoryError {
    fn from(err: &str) -> Self {
        MemoryError::InvalidConfig(err.to_string())
    }
}

// Implement serialization for Tauri commands
impl serde::Serialize for MemoryError {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: serde::Serializer,
    {
        serializer.serialize_str(&self.to_string())
    }
}

pub type MemoryResult<T> = Result<T, MemoryError>;

/// Cleanup log entry for audit trail
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CleanupLogEntry {
    pub id: String,
    pub cleanup_type: String,
    pub tier: MemoryTier,
    pub project_id: Option<String>,
    pub session_id: Option<String>,
    pub chunks_deleted: i64,
    pub bytes_reclaimed: i64,
    pub created_at: DateTime<Utc>,
}

/// Default embedding dimension for all-MiniLM-L6-v2
pub const DEFAULT_EMBEDDING_DIMENSION: usize = 384;

/// Default embedding model name
pub const DEFAULT_EMBEDDING_MODEL: &str = "all-MiniLM-L6-v2";

/// Maximum content length for a single chunk (in characters)
pub const MAX_CHUNK_LENGTH: usize = 4000;

/// Minimum content length for a chunk (in characters)
pub const MIN_CHUNK_LENGTH: usize = 50;

/// Persistent global memory record keyed by user identity.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GlobalMemoryRecord {
    pub id: String,
    pub user_id: String,
    pub source_type: String,
    pub content: String,
    pub content_hash: String,
    pub run_id: String,
    pub session_id: Option<String>,
    pub message_id: Option<String>,
    pub tool_name: Option<String>,
    pub project_tag: Option<String>,
    pub channel_tag: Option<String>,
    pub host_tag: Option<String>,
    pub metadata: Option<serde_json::Value>,
    pub provenance: Option<serde_json::Value>,
    pub redaction_status: String,
    pub redaction_count: u32,
    pub visibility: String,
    pub demoted: bool,
    pub score_boost: f64,
    pub created_at_ms: u64,
    pub updated_at_ms: u64,
    pub expires_at_ms: Option<u64>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GlobalMemoryWriteResult {
    pub id: String,
    pub stored: bool,
    pub deduped: bool,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GlobalMemorySearchHit {
    pub record: GlobalMemoryRecord,
    pub score: f64,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum NodeType {
    Directory,
    File,
}

impl std::fmt::Display for NodeType {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            NodeType::Directory => write!(f, "directory"),
            NodeType::File => write!(f, "file"),
        }
    }
}

impl std::str::FromStr for NodeType {
    type Err = String;
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s.to_lowercase().as_str() {
            "directory" => Ok(NodeType::Directory),
            "file" => Ok(NodeType::File),
            _ => Err(format!("unknown node type: {}", s)),
        }
    }
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "UPPERCASE")]
pub enum LayerType {
    L0,
    L1,
    L2,
}

impl std::fmt::Display for LayerType {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            LayerType::L0 => write!(f, "L0"),
            LayerType::L1 => write!(f, "L1"),
            LayerType::L2 => write!(f, "L2"),
        }
    }
}

impl std::str::FromStr for LayerType {
    type Err = String;
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s.to_uppercase().as_str() {
            "L0" | "L0_ABSTRACT" => Ok(LayerType::L0),
            "L1" | "L1_OVERVIEW" => Ok(LayerType::L1),
            "L2" | "L2_DETAIL" => Ok(LayerType::L2),
            _ => Err(format!("unknown layer type: {}", s)),
        }
    }
}

impl LayerType {
    pub fn default_tokens(&self) -> usize {
        match self {
            LayerType::L0 => 100,
            LayerType::L1 => 2000,
            LayerType::L2 => 4000,
        }
    }
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RetrievalStep {
    pub step_type: String,
    pub description: String,
    pub layer_accessed: Option<LayerType>,
    pub nodes_evaluated: usize,
    pub scores: std::collections::HashMap<String, f64>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NodeVisit {
    pub uri: String,
    pub node_type: NodeType,
    pub score: f64,
    pub depth: usize,
    pub layer_loaded: Option<LayerType>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RetrievalTrajectory {
    pub id: String,
    pub query: String,
    pub root_uri: String,
    pub steps: Vec<RetrievalStep>,
    pub visited_nodes: Vec<NodeVisit>,
    pub total_duration_ms: u64,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RetrievalResult {
    pub node_id: String,
    pub uri: String,
    pub content: String,
    pub layer_type: LayerType,
    pub score: f64,
    pub trajectory: RetrievalTrajectory,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryNode {
    pub id: String,
    pub uri: String,
    pub parent_uri: Option<String>,
    pub node_type: NodeType,
    pub created_at: DateTime<Utc>,
    pub updated_at: DateTime<Utc>,
    pub metadata: Option<serde_json::Value>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryLayer {
    pub id: String,
    pub node_id: String,
    pub layer_type: LayerType,
    pub content: String,
    pub token_count: i64,
    pub embedding_id: Option<String>,
    pub created_at: DateTime<Utc>,
    pub source_chunk_id: Option<String>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TreeNode {
    pub node: MemoryNode,
    pub children: Vec<TreeNode>,
    pub layer_summary: Option<LayerSummary>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LayerSummary {
    pub l0_preview: Option<String>,
    pub l1_preview: Option<String>,
    pub has_l2: bool,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DirectoryListing {
    pub uri: String,
    pub nodes: Vec<MemoryNode>,
    pub total_children: usize,
    pub directories: Vec<MemoryNode>,
    pub files: Vec<MemoryNode>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DistilledFact {
    pub id: String,
    pub distillation_id: String,
    pub content: String,
    pub category: FactCategory,
    pub importance_score: f64,
    pub source_message_ids: Vec<String>,
    pub contradicts_fact_id: Option<String>,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum FactCategory {
    UserPreference,
    TaskOutcome,
    Learning,
    Fact,
}

impl std::fmt::Display for FactCategory {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            FactCategory::UserPreference => write!(f, "user_preference"),
            FactCategory::TaskOutcome => write!(f, "task_outcome"),
            FactCategory::Learning => write!(f, "learning"),
            FactCategory::Fact => write!(f, "fact"),
        }
    }
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DistillationReport {
    pub distillation_id: String,
    pub session_id: String,
    pub distilled_at: DateTime<Utc>,
    pub facts_extracted: usize,
    pub importance_threshold: f64,
    pub user_memory_updated: bool,
    pub agent_memory_updated: bool,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SessionDistillation {
    pub id: String,
    pub session_id: String,
    pub distilled_at: DateTime<Utc>,
    pub input_token_count: i64,
    pub output_memory_count: usize,
    pub key_facts_extracted: usize,
    pub importance_threshold: f64,
}