codex_memory/
models.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use uuid::Uuid;
4
5// Import memory tiering types
6// Note: Memory tier system has been removed - this is now a simple storage system
7
8/// Enhanced memory structure for text storage with context and summary
9/// Implements encoding specificity principle (Tulving & Thomson, 1973)
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct Memory {
12    pub id: Uuid,
13    pub content: String,
14    pub content_hash: String, // Simple content hash for basic deduplication
15    // Removed context_fingerprint - simplified to use only content_hash
16    pub tags: Vec<String>,
17    pub context: String,           // Required: What is being stored and why
18    pub summary: String,           // Required: Brief summary (120 words or less)
19    pub chunk_index: Option<i32>,  // Index of this chunk (1-based, null for non-chunked content)
20    pub total_chunks: Option<i32>, // Total number of chunks in the set
21    pub parent_id: Option<Uuid>,   // ID of the parent document (for linking chunks together)
22    pub created_at: DateTime<Utc>,
23    pub updated_at: DateTime<Utc>,
24}
25
26impl Memory {
27    /// Create a new memory with the given content
28    /// Implements encoding specificity principle - context matters for retrieval
29    pub fn new(
30        content: String,
31        context: String,
32        summary: String,
33        tags: Option<Vec<String>>,
34    ) -> Self {
35        use sha2::{Digest, Sha256};
36
37        // Simple content hash for basic deduplication
38        let mut content_hasher = Sha256::new();
39        content_hasher.update(content.as_bytes());
40        let content_hash = hex::encode(content_hasher.finalize());
41
42        let now = Utc::now();
43        Self {
44            id: Uuid::new_v4(),
45            content,
46            content_hash,
47            tags: tags.unwrap_or_default(),
48            context,
49            summary,
50            chunk_index: None,
51            total_chunks: None,
52            parent_id: None,
53            created_at: now,
54            updated_at: now,
55        }
56    }
57
58    /// Create a new chunked memory with parent reference
59    /// Preserves context specificity for chunked content
60    pub fn new_chunk(
61        content: String,
62        context: String,
63        summary: String,
64        tags: Option<Vec<String>>,
65        chunk_index: i32,
66        total_chunks: i32,
67        parent_id: Uuid,
68    ) -> Self {
69        use sha2::{Digest, Sha256};
70
71        // Simple content hash for basic deduplication
72        let mut content_hasher = Sha256::new();
73        content_hasher.update(content.as_bytes());
74        let content_hash = hex::encode(content_hasher.finalize());
75
76        let now = Utc::now();
77        Self {
78            id: Uuid::new_v4(),
79            content,
80            content_hash,
81            tags: tags.unwrap_or_default(),
82            context,
83            summary,
84            chunk_index: Some(chunk_index),
85            total_chunks: Some(total_chunks),
86            parent_id: Some(parent_id),
87            created_at: now,
88            updated_at: now,
89        }
90    }
91
92    // Removed complex context fingerprint - simplified system uses only content_hash
93
94    // Removed chunk context fingerprint - simplified system
95
96    /// Check semantic similarity between two memories
97    /// Implements transfer appropriate processing - matching encoding and retrieval contexts
98    pub fn is_semantically_similar(&self, other: &Memory, similarity_threshold: f64) -> bool {
99        // For now, implement simple context similarity
100        // In a full implementation, this would use embeddings
101        let content_similarity = self.simple_text_similarity(&self.content, &other.content);
102        let context_similarity = self.simple_text_similarity(&self.context, &other.context);
103
104        // Combined similarity with context weighting (encoding specificity)
105        let combined_similarity = (content_similarity * 0.6) + (context_similarity * 0.4);
106        combined_similarity >= similarity_threshold
107    }
108
109    /// Simple text similarity for semantic comparison
110    /// Production version would use proper embeddings
111    fn simple_text_similarity(&self, text1: &str, text2: &str) -> f64 {
112        use std::collections::HashSet;
113
114        let words1: HashSet<&str> = text1.split_whitespace().collect();
115        let words2: HashSet<&str> = text2.split_whitespace().collect();
116
117        let intersection = words1.intersection(&words2).count();
118        let union = words1.union(&words2).count();
119
120        if union == 0 {
121            0.0
122        } else {
123            intersection as f64 / union as f64
124        }
125    }
126}
127
128/// Simple storage statistics
129#[derive(Debug, Serialize, Deserialize)]
130pub struct StorageStats {
131    pub total_memories: i64,
132    pub table_size: String,
133    pub last_memory_created: Option<DateTime<Utc>>,
134}
135
136/// Search metadata indicating which search stage produced results
137#[derive(Debug, Clone, Serialize, Deserialize)]
138pub struct SearchMetadata {
139    pub stage_used: u8,
140    pub stage_description: String,
141    pub threshold_used: f64,
142    pub total_results: usize,
143}
144
145/// Combined search results with metadata
146#[derive(Debug, Clone)]
147pub struct SearchResultWithMetadata {
148    pub results: Vec<SearchResult>,
149    pub metadata: SearchMetadata,
150}
151
152/// Search parameters for semantic similarity search
153#[derive(Debug, Clone, Serialize, Deserialize)]
154pub struct SearchParams {
155    pub query: String,
156    pub tag_filter: Option<Vec<String>>,
157    pub use_tag_embedding: bool,
158    pub use_content_embedding: bool,
159    pub similarity_threshold: f64,
160    pub max_results: usize,
161    pub search_strategy: SearchStrategy,
162    pub boost_recent: bool,
163    pub tag_weight: f64,
164    pub content_weight: f64,
165}
166
167impl Default for SearchParams {
168    fn default() -> Self {
169        Self {
170            query: String::new(),
171            tag_filter: None,
172            use_tag_embedding: true,
173            use_content_embedding: true,
174            similarity_threshold: 0.7,
175            max_results: 10,
176            search_strategy: SearchStrategy::Hybrid,
177            boost_recent: false,
178            tag_weight: 0.4,
179            content_weight: 0.6,
180        }
181    }
182}
183
184/// Search strategy options
185#[derive(Debug, Clone, Serialize, Deserialize)]
186pub enum SearchStrategy {
187    TagsFirst,
188    ContentFirst,
189    Hybrid,
190}
191
192/// Search result with similarity scores
193#[derive(Debug, Clone, Serialize, Deserialize)]
194pub struct SearchResult {
195    pub memory: Memory,
196    pub tag_similarity: Option<f64>,
197    pub content_similarity: Option<f64>,
198    pub combined_score: f64,
199    pub semantic_cluster: Option<i32>,
200}
201
202impl SearchResult {
203    pub fn new(
204        memory: Memory,
205        tag_similarity: Option<f64>,
206        content_similarity: Option<f64>,
207        semantic_cluster: Option<i32>,
208        tag_weight: f64,
209        content_weight: f64,
210    ) -> Self {
211        let combined_score = Self::calculate_combined_score(
212            tag_similarity,
213            content_similarity,
214            tag_weight,
215            content_weight,
216        );
217
218        Self {
219            memory,
220            tag_similarity,
221            content_similarity,
222            combined_score,
223            semantic_cluster,
224        }
225    }
226
227    fn calculate_combined_score(
228        tag_similarity: Option<f64>,
229        content_similarity: Option<f64>,
230        tag_weight: f64,
231        content_weight: f64,
232    ) -> f64 {
233        let tag_score = tag_similarity.unwrap_or(0.0) * tag_weight;
234        let content_score = content_similarity.unwrap_or(0.0) * content_weight;
235        tag_score + content_score
236    }
237}