codex_memory/
models.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use uuid::Uuid;
4
5// Import memory tiering types
6// Note: Memory tier system has been removed - this is now a simple storage system
7
8/// Enhanced memory structure for text storage with context and summary
9/// Implements encoding specificity principle (Tulving & Thomson, 1973)
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct Memory {
12    pub id: Uuid,
13    pub content: String,
14    pub content_hash: String, // Simple content hash for basic deduplication
15    pub context_fingerprint: String, // Context-aware fingerprint for encoding specificity
16    pub tags: Vec<String>,
17    pub context: String,           // Required: What is being stored and why
18    pub summary: String,           // Required: Brief summary (120 words or less)
19    pub chunk_index: Option<i32>,  // Index of this chunk (1-based, null for non-chunked content)
20    pub total_chunks: Option<i32>, // Total number of chunks in the set
21    pub parent_id: Option<Uuid>,   // ID of the parent document (for linking chunks together)
22    pub created_at: DateTime<Utc>,
23    pub updated_at: DateTime<Utc>,
24}
25
26impl Memory {
27    /// Create a new memory with the given content
28    /// Implements encoding specificity principle - context matters for retrieval
29    pub fn new(
30        content: String,
31        context: String,
32        summary: String,
33        tags: Option<Vec<String>>,
34    ) -> Self {
35        use sha2::{Digest, Sha256};
36
37        // Simple content hash for basic deduplication
38        let mut content_hasher = Sha256::new();
39        content_hasher.update(content.as_bytes());
40        let content_hash = hex::encode(content_hasher.finalize());
41
42        // Context-aware fingerprint for encoding specificity
43        // Combines content + context + summary for semantic distinctiveness
44        let context_fingerprint =
45            Self::compute_context_fingerprint(&content, &context, &summary, &tags);
46
47        let now = Utc::now();
48        Self {
49            id: Uuid::new_v4(),
50            content,
51            content_hash,
52            context_fingerprint,
53            tags: tags.unwrap_or_default(),
54            context,
55            summary,
56            chunk_index: None,
57            total_chunks: None,
58            parent_id: None,
59            created_at: now,
60            updated_at: now,
61        }
62    }
63
64    /// Create a new chunked memory with parent reference
65    /// Preserves context specificity for chunked content
66    pub fn new_chunk(
67        content: String,
68        context: String,
69        summary: String,
70        tags: Option<Vec<String>>,
71        chunk_index: i32,
72        total_chunks: i32,
73        parent_id: Uuid,
74    ) -> Self {
75        use sha2::{Digest, Sha256};
76
77        // Simple content hash for basic deduplication
78        let mut content_hasher = Sha256::new();
79        content_hasher.update(content.as_bytes());
80        let content_hash = hex::encode(content_hasher.finalize());
81
82        // Context-aware fingerprint including chunk position
83        let context_fingerprint = Self::compute_context_fingerprint_chunk(
84            &content,
85            &context,
86            &summary,
87            &tags,
88            chunk_index,
89            total_chunks,
90            parent_id,
91        );
92
93        let now = Utc::now();
94        Self {
95            id: Uuid::new_v4(),
96            content,
97            content_hash,
98            context_fingerprint,
99            tags: tags.unwrap_or_default(),
100            context,
101            summary,
102            chunk_index: Some(chunk_index),
103            total_chunks: Some(total_chunks),
104            parent_id: Some(parent_id),
105            created_at: now,
106            updated_at: now,
107        }
108    }
109
110    /// Compute context-aware fingerprint implementing encoding specificity principle
111    /// Based on Tulving & Thomson (1973) - context at encoding affects retrieval success
112    fn compute_context_fingerprint(
113        content: &str,
114        context: &str,
115        summary: &str,
116        tags: &Option<Vec<String>>,
117    ) -> String {
118        use sha2::{Digest, Sha256};
119
120        let mut hasher = Sha256::new();
121
122        // Include content
123        hasher.update(content.as_bytes());
124        hasher.update(b"::CONTEXT::");
125
126        // Include context - this is the key for encoding specificity
127        hasher.update(context.as_bytes());
128        hasher.update(b"::SUMMARY::");
129
130        // Include summary for semantic distinctiveness
131        hasher.update(summary.as_bytes());
132        hasher.update(b"::TAGS::");
133
134        // Include tags for additional contextual cues
135        if let Some(tags) = tags {
136            let mut sorted_tags = tags.clone();
137            sorted_tags.sort(); // Ensure consistent ordering
138            for tag in sorted_tags {
139                hasher.update(tag.as_bytes());
140                hasher.update(b",");
141            }
142        }
143
144        hex::encode(hasher.finalize())
145    }
146
147    /// Compute context fingerprint for chunked content
148    /// Includes chunk position in context for proper retrieval cues
149    fn compute_context_fingerprint_chunk(
150        content: &str,
151        context: &str,
152        summary: &str,
153        tags: &Option<Vec<String>>,
154        chunk_index: i32,
155        total_chunks: i32,
156        parent_id: Uuid,
157    ) -> String {
158        use sha2::{Digest, Sha256};
159
160        let mut hasher = Sha256::new();
161
162        // Include all basic context elements
163        hasher.update(content.as_bytes());
164        hasher.update(b"::CONTEXT::");
165        hasher.update(context.as_bytes());
166        hasher.update(b"::SUMMARY::");
167        hasher.update(summary.as_bytes());
168        hasher.update(b"::CHUNK_CONTEXT::");
169
170        // Include chunk-specific context for retrieval cues
171        hasher.update(chunk_index.to_string().as_bytes());
172        hasher.update(b":");
173        hasher.update(total_chunks.to_string().as_bytes());
174        hasher.update(b"::");
175        hasher.update(parent_id.to_string().as_bytes());
176        hasher.update(b"::TAGS::");
177
178        // Include tags
179        if let Some(tags) = tags {
180            let mut sorted_tags = tags.clone();
181            sorted_tags.sort();
182            for tag in sorted_tags {
183                hasher.update(tag.as_bytes());
184                hasher.update(b",");
185            }
186        }
187
188        hex::encode(hasher.finalize())
189    }
190
191    /// Check semantic similarity between two memories
192    /// Implements transfer appropriate processing - matching encoding and retrieval contexts
193    pub fn is_semantically_similar(&self, other: &Memory, similarity_threshold: f64) -> bool {
194        // For now, implement simple context similarity
195        // In a full implementation, this would use embeddings
196        let content_similarity = self.simple_text_similarity(&self.content, &other.content);
197        let context_similarity = self.simple_text_similarity(&self.context, &other.context);
198
199        // Combined similarity with context weighting (encoding specificity)
200        let combined_similarity = (content_similarity * 0.6) + (context_similarity * 0.4);
201        combined_similarity >= similarity_threshold
202    }
203
204    /// Simple text similarity for semantic comparison
205    /// Production version would use proper embeddings
206    fn simple_text_similarity(&self, text1: &str, text2: &str) -> f64 {
207        use std::collections::HashSet;
208
209        let words1: HashSet<&str> = text1.split_whitespace().collect();
210        let words2: HashSet<&str> = text2.split_whitespace().collect();
211
212        let intersection = words1.intersection(&words2).count();
213        let union = words1.union(&words2).count();
214
215        if union == 0 {
216            0.0
217        } else {
218            intersection as f64 / union as f64
219        }
220    }
221}
222
223/// Simple storage statistics
224#[derive(Debug, Serialize, Deserialize)]
225pub struct StorageStats {
226    pub total_memories: i64,
227    pub table_size: String,
228    pub last_memory_created: Option<DateTime<Utc>>,
229}