codex_memory/
storage.rs

1use crate::error::Result;
2use crate::models::{
3    Memory, SearchParams, SearchResult, SearchResultWithMetadata, SearchStrategy, StorageStats,
4};
5use sqlx::{PgPool, Row};
6use uuid::Uuid;
7
8/// Simple storage repository for text data
9pub struct Storage {
10    pool: PgPool,
11}
12
13impl Storage {
14    /// Create a new storage instance
15    pub fn new(pool: PgPool) -> Self {
16        Self { pool }
17    }
18
19    /// Store text with context and summary (deduplication by hash)
20    pub async fn store(
21        &self,
22        content: &str,
23        context: String,
24        summary: String,
25        tags: Option<Vec<String>>,
26    ) -> Result<Uuid> {
27        let memory = Memory::new(content.to_string(), context, summary, tags);
28
29        // Simple content deduplication based on content hash
30        let result: Uuid = sqlx::query_scalar(
31            r#"
32            INSERT INTO memories (id, content, content_hash, tags, context, summary, chunk_index, total_chunks, parent_id, created_at, updated_at)
33            VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
34            ON CONFLICT (content_hash) DO UPDATE SET
35                context = EXCLUDED.context,
36                summary = EXCLUDED.summary,
37                tags = EXCLUDED.tags,
38                updated_at = EXCLUDED.updated_at
39            RETURNING id
40            "#
41        )
42        .bind(memory.id)
43        .bind(memory.content)
44        .bind(memory.content_hash)
45        .bind(&memory.tags)
46        .bind(&memory.context)
47        .bind(&memory.summary)
48        .bind(memory.chunk_index)
49        .bind(memory.total_chunks)
50        .bind(memory.parent_id)
51        .bind(memory.created_at)
52        .bind(memory.updated_at)
53        .fetch_one(&self.pool)
54        .await?;
55
56        Ok(result)
57    }
58
59    /// Store a chunk with parent reference
60    pub async fn store_chunk(
61        &self,
62        content: &str,
63        context: String,
64        summary: String,
65        tags: Option<Vec<String>>,
66        chunk_index: i32,
67        total_chunks: i32,
68        parent_id: Uuid,
69    ) -> Result<Uuid> {
70        let memory = Memory::new_chunk(
71            content.to_string(),
72            context,
73            summary,
74            tags,
75            chunk_index,
76            total_chunks,
77            parent_id,
78        );
79
80        // Insert chunk (no deduplication for chunks to preserve order)
81        let result: Uuid = sqlx::query_scalar(
82            r#"
83            INSERT INTO memories (id, content, content_hash, tags, context, summary, chunk_index, total_chunks, parent_id, created_at, updated_at)
84            VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
85            RETURNING id
86            "#
87        )
88        .bind(memory.id)
89        .bind(memory.content)
90        .bind(memory.content_hash)
91        .bind(&memory.tags)
92        .bind(&memory.context)
93        .bind(&memory.summary)
94        .bind(memory.chunk_index)
95        .bind(memory.total_chunks)
96        .bind(memory.parent_id)
97        .bind(memory.created_at)
98        .bind(memory.updated_at)
99        .fetch_one(&self.pool)
100        .await?;
101
102        Ok(result)
103    }
104
105    /// Get memory by ID
106    pub async fn get(&self, id: Uuid) -> Result<Option<Memory>> {
107        let row = sqlx::query(
108            r#"
109            SELECT 
110                id,
111                content,
112                content_hash,
113                tags,
114                context,
115                summary,
116                chunk_index,
117                total_chunks,
118                parent_id,
119                created_at,
120                updated_at
121            FROM memories
122            WHERE id = $1
123            "#,
124        )
125        .bind(id)
126        .fetch_optional(&self.pool)
127        .await?;
128
129        match row {
130            Some(row) => {
131                let memory = Memory {
132                    id: row.get("id"),
133                    content: row.get("content"),
134                    content_hash: row.get("content_hash"),
135                    tags: row.get("tags"),
136                    context: row.get("context"),
137                    summary: row.get("summary"),
138                    chunk_index: row.get("chunk_index"),
139                    total_chunks: row.get("total_chunks"),
140                    parent_id: row.get("parent_id"),
141                    created_at: row.get("created_at"),
142                    updated_at: row.get("updated_at"),
143                };
144                Ok(Some(memory))
145            }
146            None => Ok(None),
147        }
148    }
149
150    /// Get all chunks for a parent document, ordered by chunk index
151    pub async fn get_chunks(&self, parent_id: Uuid) -> Result<Vec<Memory>> {
152        let rows = sqlx::query(
153            r#"
154            SELECT 
155                id,
156                content,
157                content_hash,
158                tags,
159                context,
160                summary,
161                chunk_index,
162                total_chunks,
163                parent_id,
164                created_at,
165                updated_at
166            FROM memories
167            WHERE parent_id = $1
168            ORDER BY chunk_index ASC
169            "#,
170        )
171        .bind(parent_id)
172        .fetch_all(&self.pool)
173        .await?;
174
175        let memories = rows
176            .into_iter()
177            .map(|row| Memory {
178                id: row.get("id"),
179                content: row.get("content"),
180                content_hash: row.get("content_hash"),
181                tags: row.get("tags"),
182                context: row.get("context"),
183                summary: row.get("summary"),
184                chunk_index: row.get("chunk_index"),
185                total_chunks: row.get("total_chunks"),
186                parent_id: row.get("parent_id"),
187                created_at: row.get("created_at"),
188                updated_at: row.get("updated_at"),
189            })
190            .collect();
191
192        Ok(memories)
193    }
194
195    /// Delete memory by ID
196    pub async fn delete(&self, id: Uuid) -> Result<bool> {
197        let result = sqlx::query("DELETE FROM memories WHERE id = $1")
198            .bind(id)
199            .execute(&self.pool)
200            .await?;
201
202        Ok(result.rows_affected() > 0)
203    }
204
205    /// Get basic storage statistics
206    pub async fn stats(&self) -> Result<StorageStats> {
207        let row = sqlx::query(
208            r#"
209            SELECT 
210                COUNT(*) as total_memories,
211                pg_size_pretty(pg_total_relation_size('memories')) as table_size,
212                MAX(created_at) as last_memory_created
213            FROM memories
214            "#,
215        )
216        .fetch_one(&self.pool)
217        .await?;
218
219        let stats = StorageStats {
220            total_memories: row.get("total_memories"),
221            table_size: row.get("table_size"),
222            last_memory_created: row.get("last_memory_created"),
223        };
224
225        Ok(stats)
226    }
227
228    /// List recent memories (for basic browsing)
229    pub async fn list_recent(&self, limit: i64) -> Result<Vec<Memory>> {
230        let rows = sqlx::query(
231            r#"
232            SELECT 
233                id,
234                content,
235                content_hash,
236                tags,
237                context,
238                summary,
239                chunk_index,
240                total_chunks,
241                parent_id,
242                created_at,
243                updated_at
244            FROM memories
245            ORDER BY created_at DESC
246            LIMIT $1
247            "#,
248        )
249        .bind(limit)
250        .fetch_all(&self.pool)
251        .await?;
252
253        let memories = rows
254            .into_iter()
255            .map(|row| Memory {
256                id: row.get("id"),
257                content: row.get("content"),
258                content_hash: row.get("content_hash"),
259                tags: row.get("tags"),
260                context: row.get("context"),
261                summary: row.get("summary"),
262                chunk_index: row.get("chunk_index"),
263                total_chunks: row.get("total_chunks"),
264                parent_id: row.get("parent_id"),
265                created_at: row.get("created_at"),
266                updated_at: row.get("updated_at"),
267            })
268            .collect();
269
270        Ok(memories)
271    }
272
273    /// Find memories with similar content but different contexts
274    /// Implements transfer appropriate processing - matching content with varying contexts
275    pub async fn find_similar_content(
276        &self,
277        content_hash: &str,
278        limit: i64,
279    ) -> Result<Vec<Memory>> {
280        let rows = sqlx::query(
281            r#"
282            SELECT 
283                id,
284                content,
285                content_hash,
286                tags,
287                context,
288                summary,
289                chunk_index,
290                total_chunks,
291                parent_id,
292                created_at,
293                updated_at
294            FROM memories
295            WHERE content_hash = $1
296            ORDER BY created_at DESC
297            LIMIT $2
298            "#,
299        )
300        .bind(content_hash)
301        .bind(limit)
302        .fetch_all(&self.pool)
303        .await?;
304
305        let memories = rows
306            .into_iter()
307            .map(|row| Memory {
308                id: row.get("id"),
309                content: row.get("content"),
310                content_hash: row.get("content_hash"),
311                tags: row.get("tags"),
312                context: row.get("context"),
313                summary: row.get("summary"),
314                chunk_index: row.get("chunk_index"),
315                total_chunks: row.get("total_chunks"),
316                parent_id: row.get("parent_id"),
317                created_at: row.get("created_at"),
318                updated_at: row.get("updated_at"),
319            })
320            .collect();
321
322        Ok(memories)
323    }
324
325    /// Check if a specific content already exists
326    /// Simplified deduplication based on content hash only
327    pub async fn exists_with_content(&self, content_hash: &str) -> Result<bool> {
328        let count: i64 =
329            sqlx::query_scalar("SELECT COUNT(*) FROM memories WHERE content_hash = $1")
330                .bind(content_hash)
331                .fetch_one(&self.pool)
332                .await?;
333
334        Ok(count > 0)
335    }
336
337    /// Get content statistics showing duplicate content
338    /// Useful for understanding deduplication effectiveness
339    pub async fn get_content_stats(&self) -> Result<Vec<(String, i64)>> {
340        let rows = sqlx::query(
341            r#"
342            SELECT 
343                content_hash,
344                COUNT(*) as total_count
345            FROM memories 
346            GROUP BY content_hash
347            HAVING COUNT(*) > 1
348            ORDER BY total_count DESC
349            LIMIT 50
350            "#,
351        )
352        .fetch_all(&self.pool)
353        .await?;
354
355        let stats = rows
356            .into_iter()
357            .map(|row| {
358                (
359                    row.get::<String, _>("content_hash"),
360                    row.get::<i64, _>("total_count"),
361                )
362            })
363            .collect();
364
365        Ok(stats)
366    }
367
368    /// Semantic similarity search using existing embeddings from codex-dreams
369    /// Implements progressive search strategy with automatic threshold relaxation
370    pub async fn search_memories(&self, params: SearchParams) -> Result<Vec<SearchResult>> {
371        // Use progressive search strategy for better results
372        self.search_memories_progressive(params).await
373    }
374
375    /// Progressive search strategy that automatically retries with relaxed criteria
376    /// Stage 1: Search with original parameters
377    /// Stage 2: If no results, lower threshold by 0.25
378    /// Stage 3: If still no results, do content-only similarity search
379    async fn search_memories_progressive(&self, params: SearchParams) -> Result<Vec<SearchResult>> {
380        let result_with_metadata = self
381            .search_memories_progressive_with_metadata(params)
382            .await?;
383        Ok(result_with_metadata.results)
384    }
385
386    /// Progressive search with metadata about which stage was used
387    pub async fn search_memories_progressive_with_metadata(
388        &self,
389        params: SearchParams,
390    ) -> Result<SearchResultWithMetadata> {
391        use crate::models::SearchMetadata;
392
393        // Stage 1: Try with original parameters
394        let stage1_results = self.search_memories_internal(params.clone()).await?;
395        if !stage1_results.is_empty() {
396            let metadata = SearchMetadata {
397                stage_used: 1,
398                stage_description: "Original parameters".to_string(),
399                threshold_used: params.similarity_threshold,
400                total_results: stage1_results.len(),
401            };
402            return Ok(SearchResultWithMetadata {
403                results: stage1_results,
404                metadata,
405            });
406        }
407
408        // Stage 2: Lower threshold by 0.25 (minimum 0.1)
409        let mut relaxed_params = params.clone();
410        relaxed_params.similarity_threshold = (params.similarity_threshold - 0.25).max(0.1);
411
412        let stage2_results = self
413            .search_memories_internal(relaxed_params.clone())
414            .await?;
415        if !stage2_results.is_empty() {
416            let metadata = SearchMetadata {
417                stage_used: 2,
418                stage_description: "Relaxed threshold".to_string(),
419                threshold_used: relaxed_params.similarity_threshold,
420                total_results: stage2_results.len(),
421            };
422            return Ok(SearchResultWithMetadata {
423                results: stage2_results,
424                metadata,
425            });
426        }
427
428        // Stage 3: Content-only search with very low threshold
429        let mut content_params = params.clone();
430        content_params.similarity_threshold = 0.1;
431        content_params.use_tag_embedding = false;
432        content_params.search_strategy = SearchStrategy::ContentFirst;
433
434        let stage3_results = self.search_memories_internal(content_params).await?;
435        let metadata = SearchMetadata {
436            stage_used: 3,
437            stage_description: "Content-only similarity".to_string(),
438            threshold_used: 0.1,
439            total_results: stage3_results.len(),
440        };
441
442        Ok(SearchResultWithMetadata {
443            results: stage3_results,
444            metadata,
445        })
446    }
447
448    /// Internal search implementation (original search_memories logic)
449    async fn search_memories_internal(&self, params: SearchParams) -> Result<Vec<SearchResult>> {
450        // Check if embedding columns exist (graceful degradation for test environments)
451        let has_embeddings = self.check_embedding_columns_exist().await?;
452
453        if !has_embeddings || (!params.use_tag_embedding && !params.use_content_embedding) {
454            // Use fallback text search when embeddings unavailable or disabled
455            return self.search_memories_fallback(params).await;
456        }
457
458        // Step 1: Generate query embedding by finding a similar memory first
459        // This is a simplified approach - in production, you'd use the same embedding service as codex-dreams
460        let query_memory_ids = if params.use_tag_embedding || params.use_content_embedding {
461            // Find memories with similar text content for embedding reference
462            let similar_text_rows = sqlx::query(
463                r#"
464                SELECT id, summary, content
465                FROM memories 
466                WHERE to_tsvector('english', summary || ' ' || content) @@ plainto_tsquery('english', $1)
467                AND embedding_vector IS NOT NULL
468                LIMIT 5
469                "#
470            )
471            .bind(&params.query)
472            .fetch_all(&self.pool)
473            .await;
474
475            match similar_text_rows {
476                Ok(rows) => {
477                    if rows.is_empty() {
478                        // Fallback to basic text search if no embedding matches
479                        return self.search_memories_fallback(params).await;
480                    }
481                    rows.into_iter()
482                        .map(|row| row.get::<Uuid, _>("id"))
483                        .collect::<Vec<_>>()
484                }
485                Err(_) => {
486                    // Embedding columns don't exist, use fallback
487                    return self.search_memories_fallback(params).await;
488                }
489            }
490        } else {
491            vec![]
492        };
493
494        // Step 2: Main search based on strategy
495        let mut results = match params.search_strategy {
496            SearchStrategy::TagsFirst => self.search_tags_first(&params, &query_memory_ids).await?,
497            SearchStrategy::ContentFirst => {
498                self.search_content_first(&params, &query_memory_ids)
499                    .await?
500            }
501            SearchStrategy::Hybrid => self.search_hybrid(&params, &query_memory_ids).await?,
502        };
503
504        // Step 3: Apply recency boost if requested
505        if params.boost_recent {
506            self.apply_recency_boost(&mut results);
507        }
508
509        // Step 4: Sort by combined score and limit results
510        results.sort_by(|a, b| b.combined_score.partial_cmp(&a.combined_score).unwrap());
511        results.truncate(params.max_results);
512
513        Ok(results)
514    }
515
516    /// Check if embedding columns exist in the database
517    async fn check_embedding_columns_exist(&self) -> Result<bool> {
518        let result = sqlx::query(
519            r#"
520            SELECT COUNT(*) as count
521            FROM information_schema.columns 
522            WHERE table_name = 'memories' 
523            AND column_name IN ('embedding_vector', 'tag_embedding')
524            "#,
525        )
526        .fetch_one(&self.pool)
527        .await;
528
529        match result {
530            Ok(row) => {
531                let count: i64 = row.get("count");
532                Ok(count >= 2) // Both embedding columns should exist
533            }
534            Err(_) => Ok(false),
535        }
536    }
537
538    /// Search using tag embeddings first, then content embeddings within results
539    async fn search_tags_first(
540        &self,
541        params: &SearchParams,
542        query_ids: &[Uuid],
543    ) -> Result<Vec<SearchResult>> {
544        if query_ids.is_empty() {
545            return Ok(vec![]);
546        }
547
548        // Use the first similar memory's tag embedding as reference
549        let tag_results = sqlx::query(
550            r#"
551            WITH query_embedding AS (
552                SELECT tag_embedding as query_vector
553                FROM memories 
554                WHERE id = $1 AND tag_embedding IS NOT NULL
555                LIMIT 1
556            )
557            SELECT m.*, 
558                   (m.tag_embedding <=> q.query_vector) as tag_similarity,
559                   m.semantic_cluster
560            FROM memories m, query_embedding q
561            WHERE m.tag_embedding IS NOT NULL
562            AND ($2::text[] IS NULL OR m.tags && $2::text[])
563            AND (m.tag_embedding <=> q.query_vector) <= $3
564            ORDER BY m.tag_embedding <=> q.query_vector
565            LIMIT $4
566            "#,
567        )
568        .bind(query_ids[0])
569        .bind(&params.tag_filter)
570        .bind(1.0 - params.similarity_threshold) // Convert similarity to distance
571        .bind((params.max_results * 3) as i64) // Get more candidates for content filtering
572        .fetch_all(&self.pool)
573        .await?;
574
575        self.enhance_with_content_similarity(tag_results, query_ids, params)
576            .await
577    }
578
579    /// Search using content embeddings first
580    async fn search_content_first(
581        &self,
582        params: &SearchParams,
583        query_ids: &[Uuid],
584    ) -> Result<Vec<SearchResult>> {
585        if query_ids.is_empty() {
586            return Ok(vec![]);
587        }
588
589        let content_results = sqlx::query(
590            r#"
591            WITH query_embedding AS (
592                SELECT embedding_vector as query_vector
593                FROM memories 
594                WHERE id = $1 AND embedding_vector IS NOT NULL
595                LIMIT 1
596            )
597            SELECT m.*, 
598                   (m.embedding_vector <=> q.query_vector) as content_similarity,
599                   m.semantic_cluster
600            FROM memories m, query_embedding q
601            WHERE m.embedding_vector IS NOT NULL
602            AND ($2::text[] IS NULL OR m.tags && $2::text[])
603            AND (m.embedding_vector <=> q.query_vector) <= $3
604            ORDER BY m.embedding_vector <=> q.query_vector
605            LIMIT $4
606            "#,
607        )
608        .bind(query_ids[0])
609        .bind(&params.tag_filter)
610        .bind(1.0 - params.similarity_threshold)
611        .bind((params.max_results * 2) as i64)
612        .fetch_all(&self.pool)
613        .await?;
614
615        self.enhance_with_tag_similarity(content_results, query_ids, params)
616            .await
617    }
618
619    /// Hybrid search combining both approaches
620    async fn search_hybrid(
621        &self,
622        params: &SearchParams,
623        query_ids: &[Uuid],
624    ) -> Result<Vec<SearchResult>> {
625        if query_ids.is_empty() {
626            return Ok(vec![]);
627        }
628
629        let results = sqlx::query(
630            r#"
631            WITH query_embeddings AS (
632                SELECT 
633                    embedding_vector as content_query_vector,
634                    tag_embedding as tag_query_vector
635                FROM memories 
636                WHERE id = $1 
637                AND embedding_vector IS NOT NULL 
638                AND tag_embedding IS NOT NULL
639                LIMIT 1
640            )
641            SELECT m.*, 
642                   (m.embedding_vector <=> q.content_query_vector) as content_similarity,
643                   (m.tag_embedding <=> q.tag_query_vector) as tag_similarity,
644                   m.semantic_cluster
645            FROM memories m, query_embeddings q
646            WHERE m.embedding_vector IS NOT NULL 
647            AND m.tag_embedding IS NOT NULL
648            AND ($2::text[] IS NULL OR m.tags && $2::text[])
649            AND (
650                (m.embedding_vector <=> q.content_query_vector) <= $3 OR
651                (m.tag_embedding <=> q.tag_query_vector) <= $3
652            )
653            ORDER BY LEAST(
654                m.embedding_vector <=> q.content_query_vector,
655                m.tag_embedding <=> q.tag_query_vector
656            )
657            LIMIT $4
658            "#,
659        )
660        .bind(query_ids[0])
661        .bind(&params.tag_filter)
662        .bind(1.0 - params.similarity_threshold)
663        .bind((params.max_results * 2) as i64)
664        .fetch_all(&self.pool)
665        .await?;
666
667        Ok(self.rows_to_search_results(results, params))
668    }
669
670    /// Enhance tag results with content similarity scores
671    async fn enhance_with_content_similarity(
672        &self,
673        tag_results: Vec<sqlx::postgres::PgRow>,
674        query_ids: &[Uuid],
675        params: &SearchParams,
676    ) -> Result<Vec<SearchResult>> {
677        if query_ids.is_empty() || tag_results.is_empty() {
678            return Ok(vec![]);
679        }
680
681        // Get content similarities for the tag results
682        let memory_ids: Vec<Uuid> = tag_results.iter().map(|r| r.get("id")).collect();
683
684        let content_similarities = if params.use_content_embedding {
685            sqlx::query(
686                r#"
687                WITH query_embedding AS (
688                    SELECT embedding_vector as query_vector
689                    FROM memories 
690                    WHERE id = $1 AND embedding_vector IS NOT NULL
691                    LIMIT 1
692                )
693                SELECT m.id, (m.embedding_vector <=> q.query_vector) as content_similarity
694                FROM memories m, query_embedding q
695                WHERE m.id = ANY($2) AND m.embedding_vector IS NOT NULL
696                "#,
697            )
698            .bind(query_ids[0])
699            .bind(&memory_ids)
700            .fetch_all(&self.pool)
701            .await?
702        } else {
703            vec![]
704        };
705
706        // Create lookup map for content similarities
707        let content_sim_map: std::collections::HashMap<Uuid, f64> = content_similarities
708            .into_iter()
709            .map(|row| (row.get("id"), 1.0 - row.get::<f64, _>("content_similarity")))
710            .collect();
711
712        // Combine results
713        let mut results = vec![];
714        for row in tag_results {
715            let memory_id: Uuid = row.get("id");
716            let tag_similarity = Some(1.0 - row.get::<f64, _>("tag_similarity"));
717            let content_similarity = content_sim_map.get(&memory_id).copied();
718            let semantic_cluster = row.get("semantic_cluster");
719
720            let memory = self.row_to_memory(&row);
721            let result = SearchResult::new(
722                memory,
723                tag_similarity,
724                content_similarity,
725                semantic_cluster,
726                params.tag_weight,
727                params.content_weight,
728            );
729
730            if result.combined_score >= params.similarity_threshold {
731                results.push(result);
732            }
733        }
734
735        Ok(results)
736    }
737
738    /// Enhance content results with tag similarity scores  
739    async fn enhance_with_tag_similarity(
740        &self,
741        content_results: Vec<sqlx::postgres::PgRow>,
742        query_ids: &[Uuid],
743        params: &SearchParams,
744    ) -> Result<Vec<SearchResult>> {
745        if query_ids.is_empty() || content_results.is_empty() {
746            return Ok(vec![]);
747        }
748
749        let memory_ids: Vec<Uuid> = content_results.iter().map(|r| r.get("id")).collect();
750
751        let tag_similarities = if params.use_tag_embedding {
752            sqlx::query(
753                r#"
754                WITH query_embedding AS (
755                    SELECT tag_embedding as query_vector
756                    FROM memories 
757                    WHERE id = $1 AND tag_embedding IS NOT NULL
758                    LIMIT 1
759                )
760                SELECT m.id, (m.tag_embedding <=> q.query_vector) as tag_similarity
761                FROM memories m, query_embedding q
762                WHERE m.id = ANY($2) AND m.tag_embedding IS NOT NULL
763                "#,
764            )
765            .bind(query_ids[0])
766            .bind(&memory_ids)
767            .fetch_all(&self.pool)
768            .await?
769        } else {
770            vec![]
771        };
772
773        let tag_sim_map: std::collections::HashMap<Uuid, f64> = tag_similarities
774            .into_iter()
775            .map(|row| (row.get("id"), 1.0 - row.get::<f64, _>("tag_similarity")))
776            .collect();
777
778        let mut results = vec![];
779        for row in content_results {
780            let memory_id: Uuid = row.get("id");
781            let content_similarity = Some(1.0 - row.get::<f64, _>("content_similarity"));
782            let tag_similarity = tag_sim_map.get(&memory_id).copied();
783            let semantic_cluster = row.get("semantic_cluster");
784
785            let memory = self.row_to_memory(&row);
786            let result = SearchResult::new(
787                memory,
788                tag_similarity,
789                content_similarity,
790                semantic_cluster,
791                params.tag_weight,
792                params.content_weight,
793            );
794
795            if result.combined_score >= params.similarity_threshold {
796                results.push(result);
797            }
798        }
799
800        Ok(results)
801    }
802
803    /// Convert database rows to SearchResult objects
804    fn rows_to_search_results(
805        &self,
806        rows: Vec<sqlx::postgres::PgRow>,
807        params: &SearchParams,
808    ) -> Vec<SearchResult> {
809        rows.into_iter()
810            .filter_map(|row| {
811                let tag_similarity = row
812                    .try_get::<f64, _>("tag_similarity")
813                    .ok()
814                    .map(|v| 1.0 - v);
815                let content_similarity = row
816                    .try_get::<f64, _>("content_similarity")
817                    .ok()
818                    .map(|v| 1.0 - v);
819                let semantic_cluster = row.get("semantic_cluster");
820
821                let memory = self.row_to_memory(&row);
822                let result = SearchResult::new(
823                    memory,
824                    tag_similarity,
825                    content_similarity,
826                    semantic_cluster,
827                    params.tag_weight,
828                    params.content_weight,
829                );
830
831                if result.combined_score >= params.similarity_threshold {
832                    Some(result)
833                } else {
834                    None
835                }
836            })
837            .collect()
838    }
839
840    /// Convert database row to Memory object
841    fn row_to_memory(&self, row: &sqlx::postgres::PgRow) -> Memory {
842        Memory {
843            id: row.get("id"),
844            content: row.get("content"),
845            content_hash: row.get("content_hash"),
846            tags: row.get("tags"),
847            context: row.get("context"),
848            summary: row.get("summary"),
849            chunk_index: row.get("chunk_index"),
850            total_chunks: row.get("total_chunks"),
851            parent_id: row.get("parent_id"),
852            created_at: row.get("created_at"),
853            updated_at: row.get("updated_at"),
854        }
855    }
856
857    /// Apply recency boost to search results
858    fn apply_recency_boost(&self, results: &mut [SearchResult]) {
859        let now = chrono::Utc::now();
860        for result in results.iter_mut() {
861            let age_days = (now - result.memory.created_at).num_days() as f64;
862            let recency_factor = (1.0 / (1.0 + age_days / 30.0)).max(0.1); // Boost recent memories
863            result.combined_score *= recency_factor;
864        }
865    }
866
867    /// Fallback search using simple pattern matching when embeddings unavailable
868    async fn search_memories_fallback(&self, params: SearchParams) -> Result<Vec<SearchResult>> {
869        // Use ILIKE for case-insensitive pattern matching as fallback
870        let search_pattern = format!("%{}%", params.query);
871
872        let query_sql = if let Some(ref _tag_filter) = params.tag_filter {
873            r#"
874            SELECT *, 
875                   CAST(CASE 
876                     WHEN content ILIKE $1 AND summary ILIKE $1 THEN 1.0
877                     WHEN content ILIKE $1 OR summary ILIKE $1 THEN 0.8
878                     WHEN context ILIKE $1 THEN 0.6
879                     WHEN EXISTS (SELECT 1 FROM unnest(tags) AS tag WHERE tag ILIKE $1) THEN 0.5
880                     ELSE 0.4
881                   END AS FLOAT8) as rank
882            FROM memories 
883            WHERE (content ILIKE $1 OR summary ILIKE $1 OR context ILIKE $1 
884                   OR EXISTS (SELECT 1 FROM unnest(tags) AS tag WHERE tag ILIKE $1))
885            AND tags && $2::text[]
886            ORDER BY rank DESC, created_at DESC
887            LIMIT $3
888            "#
889        } else {
890            r#"
891            SELECT *, 
892                   CAST(CASE 
893                     WHEN content ILIKE $1 AND summary ILIKE $1 THEN 1.0
894                     WHEN content ILIKE $1 OR summary ILIKE $1 THEN 0.8
895                     WHEN context ILIKE $1 THEN 0.6
896                     WHEN EXISTS (SELECT 1 FROM unnest(tags) AS tag WHERE tag ILIKE $1) THEN 0.5
897                     ELSE 0.4
898                   END AS FLOAT8) as rank
899            FROM memories 
900            WHERE content ILIKE $1 OR summary ILIKE $1 OR context ILIKE $1 
901                  OR EXISTS (SELECT 1 FROM unnest(tags) AS tag WHERE tag ILIKE $1)
902            ORDER BY rank DESC, created_at DESC
903            LIMIT $2
904            "#
905        };
906
907        let rows = if let Some(ref tag_filter) = params.tag_filter {
908            sqlx::query(query_sql)
909                .bind(&search_pattern)
910                .bind(tag_filter)
911                .bind(params.max_results as i64)
912                .fetch_all(&self.pool)
913                .await?
914        } else {
915            sqlx::query(query_sql)
916                .bind(&search_pattern)
917                .bind(params.max_results as i64)
918                .fetch_all(&self.pool)
919                .await?
920        };
921
922        let results = rows
923            .into_iter()
924            .map(|row| {
925                let rank: f64 = row.get("rank");
926                let memory = self.row_to_memory(&row);
927                // For fallback search, use rank directly as it's already a final score
928                SearchResult {
929                    memory,
930                    tag_similarity: None,
931                    content_similarity: Some(rank),
932                    combined_score: rank, // Use rank directly, not weighted
933                    semantic_cluster: None,
934                }
935            })
936            .filter(|result| result.combined_score >= params.similarity_threshold)
937            .collect();
938
939        Ok(results)
940    }
941}
codex_memory/storage.rs

codex_memory/
storage.rs