codex_memory/
models.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use sqlx::FromRow;
4use uuid::Uuid;
5
6// Import memory tiering types
7// Note: Memory tier system has been removed - this is now a simple storage system
8
9/// Enhanced memory structure for text storage with context and summary
10/// Implements encoding specificity principle (Tulving & Thomson, 1973)
11#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
12pub struct Memory {
13    pub id: Uuid,
14    pub content: String,
15    pub content_hash: String, // Simple content hash for basic deduplication
16    // Removed context_fingerprint - simplified to use only content_hash
17    pub tags: Vec<String>,
18    pub context: String,           // Required: What is being stored and why
19    pub summary: String,           // Required: Brief summary (120 words or less)
20    pub chunk_index: Option<i32>,  // Index of this chunk (1-based, null for non-chunked content)
21    pub total_chunks: Option<i32>, // Total number of chunks in the set
22    pub parent_id: Option<Uuid>,   // ID of the parent document (for linking chunks together)
23    pub created_at: DateTime<Utc>,
24    pub updated_at: DateTime<Utc>,
25}
26
27impl Memory {
28    /// Create a new memory with the given content
29    /// Implements encoding specificity principle - context matters for retrieval
30    pub fn new(
31        content: String,
32        context: String,
33        summary: String,
34        tags: Option<Vec<String>>,
35    ) -> Self {
36        use sha2::{Digest, Sha256};
37
38        // Simple content hash for basic deduplication
39        let mut content_hasher = Sha256::new();
40        content_hasher.update(content.as_bytes());
41        let content_hash = hex::encode(content_hasher.finalize());
42
43        let now = Utc::now();
44        Self {
45            id: Uuid::new_v4(),
46            content,
47            content_hash,
48            tags: tags.unwrap_or_default(),
49            context,
50            summary,
51            chunk_index: None,
52            total_chunks: None,
53            parent_id: None,
54            created_at: now,
55            updated_at: now,
56        }
57    }
58
59    /// Create a new chunked memory with parent reference
60    /// Preserves context specificity for chunked content
61    pub fn new_chunk(
62        content: String,
63        context: String,
64        summary: String,
65        tags: Option<Vec<String>>,
66        chunk_index: i32,
67        total_chunks: i32,
68        parent_id: Uuid,
69    ) -> Self {
70        use sha2::{Digest, Sha256};
71
72        // Simple content hash for basic deduplication
73        let mut content_hasher = Sha256::new();
74        content_hasher.update(content.as_bytes());
75        let content_hash = hex::encode(content_hasher.finalize());
76
77        let now = Utc::now();
78        Self {
79            id: Uuid::new_v4(),
80            content,
81            content_hash,
82            tags: tags.unwrap_or_default(),
83            context,
84            summary,
85            chunk_index: Some(chunk_index),
86            total_chunks: Some(total_chunks),
87            parent_id: Some(parent_id),
88            created_at: now,
89            updated_at: now,
90        }
91    }
92
93    // Removed complex context fingerprint - simplified system uses only content_hash
94
95    // Removed chunk context fingerprint - simplified system
96
97    /// Check semantic similarity between two memories
98    /// Implements transfer appropriate processing - matching encoding and retrieval contexts
99    pub fn is_semantically_similar(&self, other: &Memory, similarity_threshold: f64) -> bool {
100        // For now, implement simple context similarity
101        // In a full implementation, this would use embeddings
102        let content_similarity = self.simple_text_similarity(&self.content, &other.content);
103        let context_similarity = self.simple_text_similarity(&self.context, &other.context);
104
105        // Combined similarity with context weighting (encoding specificity)
106        let combined_similarity = (content_similarity * 0.6) + (context_similarity * 0.4);
107        combined_similarity >= similarity_threshold
108    }
109
110    /// Simple text similarity for semantic comparison
111    /// Production version would use proper embeddings
112    fn simple_text_similarity(&self, text1: &str, text2: &str) -> f64 {
113        use std::collections::HashSet;
114
115        let words1: HashSet<&str> = text1.split_whitespace().collect();
116        let words2: HashSet<&str> = text2.split_whitespace().collect();
117
118        let intersection = words1.intersection(&words2).count();
119        let union = words1.union(&words2).count();
120
121        if union == 0 {
122            0.0
123        } else {
124            intersection as f64 / union as f64
125        }
126    }
127}
128
129/// Simple storage statistics
130#[derive(Debug, Serialize, Deserialize)]
131pub struct StorageStats {
132    pub total_memories: i64,
133    pub table_size: String,
134    pub last_memory_created: Option<DateTime<Utc>>,
135}
136
137/// Search metadata indicating which search stage produced results
138#[derive(Debug, Clone, Serialize, Deserialize)]
139pub struct SearchMetadata {
140    pub stage_used: u8,
141    pub stage_description: String,
142    pub threshold_used: f64,
143    pub total_results: usize,
144}
145
146/// Combined search results with metadata
147#[derive(Debug, Clone)]
148pub struct SearchResultWithMetadata {
149    pub results: Vec<SearchResult>,
150    pub metadata: SearchMetadata,
151}
152
153/// Search parameters for semantic similarity search
154#[derive(Debug, Clone, Serialize, Deserialize)]
155pub struct SearchParams {
156    pub query: String,
157    pub tag_filter: Option<Vec<String>>,
158    pub use_tag_embedding: bool,
159    pub use_content_embedding: bool,
160    pub similarity_threshold: f64,
161    pub max_results: usize,
162    pub search_strategy: SearchStrategy,
163    pub boost_recent: bool,
164    pub tag_weight: f64,
165    pub content_weight: f64,
166}
167
168impl Default for SearchParams {
169    fn default() -> Self {
170        Self {
171            query: String::new(),
172            tag_filter: None,
173            use_tag_embedding: true,
174            use_content_embedding: true,
175            similarity_threshold: 0.7,
176            max_results: 10,
177            search_strategy: SearchStrategy::Hybrid,
178            boost_recent: false,
179            tag_weight: 0.4,
180            content_weight: 0.6,
181        }
182    }
183}
184
185/// Search strategy options
186#[derive(Debug, Clone, Serialize, Deserialize)]
187pub enum SearchStrategy {
188    TagsFirst,
189    ContentFirst,
190    Hybrid,
191}
192
193/// Search result with similarity scores
194#[derive(Debug, Clone, Serialize, Deserialize)]
195pub struct SearchResult {
196    pub memory: Memory,
197    pub tag_similarity: Option<f64>,
198    pub content_similarity: Option<f64>,
199    pub combined_score: f64,
200    pub semantic_cluster: Option<i32>,
201}
202
203impl SearchResult {
204    pub fn new(
205        memory: Memory,
206        tag_similarity: Option<f64>,
207        content_similarity: Option<f64>,
208        semantic_cluster: Option<i32>,
209        tag_weight: f64,
210        content_weight: f64,
211    ) -> Self {
212        let combined_score = Self::calculate_combined_score(
213            tag_similarity,
214            content_similarity,
215            tag_weight,
216            content_weight,
217        );
218
219        Self {
220            memory,
221            tag_similarity,
222            content_similarity,
223            combined_score,
224            semantic_cluster,
225        }
226    }
227
228    fn calculate_combined_score(
229        tag_similarity: Option<f64>,
230        content_similarity: Option<f64>,
231        tag_weight: f64,
232        content_weight: f64,
233    ) -> f64 {
234        let tag_score = tag_similarity.unwrap_or(0.0) * tag_weight;
235        let content_score = content_similarity.unwrap_or(0.0) * content_weight;
236        tag_score + content_score
237    }
238}