Skip to main content

engram/intelligence/
proactive.rs

1//! Proactive Memory Acquisition — RML-1221
2//!
3//! Identifies knowledge gaps and suggests what the user should remember.
4//!
5//! ## Components
6//! - [`GapDetector`] — analyses coverage, detects gaps, suggests acquisitions
7//! - [`InterestTracker`] — records queries and surfaces frequent topics
8//!
9//! ## Invariants
10//! - Confidence scores are always in the range [0.0, 1.0]
11//! - Priority is 1 (highest) .. 3 (lowest)
12//! - All timestamps are RFC3339 UTC
13//! - No unwrap() in production paths
14
15use chrono::Utc;
16use rusqlite::{params, Connection};
17use serde::{Deserialize, Serialize};
18
19use crate::error::Result;
20
21// =============================================================================
22// Types
23// =============================================================================
24
25/// Summary of how well a workspace is covered by memories.
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct CoverageReport {
28    /// Total number of memories in the workspace
29    pub total_memories: i64,
30    /// Number of memories per tag (topic)
31    pub topic_distribution: Vec<(String, i64)>,
32    /// Date ranges with no memories (gap > 7 days)
33    pub temporal_gaps: Vec<TemporalGap>,
34    /// Topics that are under-represented or low quality
35    pub weak_areas: Vec<WeakArea>,
36}
37
38/// A period of time where no memories were created.
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct TemporalGap {
41    /// RFC3339 UTC timestamp — end of the previous memory
42    pub from: String,
43    /// RFC3339 UTC timestamp — start of the next memory
44    pub to: String,
45    /// Length of the gap in fractional days
46    pub gap_days: f64,
47}
48
49/// A topic that needs more coverage.
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct WeakArea {
52    /// The tag / topic label
53    pub topic: String,
54    /// How many memories carry this tag
55    pub memory_count: i64,
56    /// Average importance of memories on this topic
57    pub avg_importance: f32,
58    /// Human-readable suggestion for improvement
59    pub suggestion: String,
60}
61
62/// A gap in the user's knowledge base.
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct KnowledgeGap {
65    /// Topic or category that is missing
66    pub topic: String,
67    /// How confident we are that this is a real gap (0.0 – 1.0)
68    pub confidence: f32,
69    /// Actionable suggestion for closing the gap
70    pub suggestion: String,
71    /// IDs of existing memories related to this gap
72    pub related_memory_ids: Vec<i64>,
73}
74
75/// A concrete recommendation for a new memory to create.
76#[derive(Debug, Clone, Serialize, Deserialize)]
77pub struct AcquisitionSuggestion {
78    /// Hint for what the memory content should cover
79    pub content_hint: String,
80    /// Recommended memory type (e.g., "note", "decision", "todo")
81    pub suggested_type: String,
82    /// Priority: 1 = highest, 2 = medium, 3 = lowest
83    pub priority: u8,
84    /// Why this memory is worth creating
85    pub reason: String,
86}
87
88// =============================================================================
89// DDL
90// =============================================================================
91
92/// DDL for the query_log table — call once during schema setup.
93pub const CREATE_QUERY_LOG_TABLE: &str = r#"
94    CREATE TABLE IF NOT EXISTS query_log (
95        id        INTEGER PRIMARY KEY AUTOINCREMENT,
96        query     TEXT NOT NULL,
97        workspace TEXT NOT NULL DEFAULT 'default',
98        timestamp TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))
99    );
100    CREATE INDEX IF NOT EXISTS idx_query_log_workspace ON query_log(workspace);
101    CREATE INDEX IF NOT EXISTS idx_query_log_timestamp ON query_log(timestamp);
102"#;
103
104// =============================================================================
105// GapDetector
106// =============================================================================
107
108/// Analyses a workspace and surfaces knowledge gaps.
109pub struct GapDetector;
110
111impl GapDetector {
112    pub fn new() -> Self {
113        Self
114    }
115
116    /// Analyse memory coverage for `workspace`.
117    ///
118    /// Returns a [`CoverageReport`] with tag distribution, temporal gaps,
119    /// and weak areas.
120    pub fn analyze_coverage(&self, conn: &Connection, workspace: &str) -> Result<CoverageReport> {
121        // 1. Total memory count
122        let total_memories: i64 = conn.query_row(
123            "SELECT COUNT(*) FROM memories WHERE workspace = ?1",
124            params![workspace],
125            |row| row.get(0),
126        )?;
127
128        // 2. Topic distribution — count memories per tag
129        let topic_distribution = self.count_memories_per_tag(conn, workspace)?;
130
131        // 3. Temporal gaps — fetch sorted timestamps, look for gaps > 7 days
132        let temporal_gaps = self.find_temporal_gaps(conn, workspace)?;
133
134        // 4. Weak areas — tags with < 3 memories or avg importance < 0.3
135        let weak_areas = self.find_weak_areas(conn, workspace)?;
136
137        Ok(CoverageReport {
138            total_memories,
139            topic_distribution,
140            temporal_gaps,
141            weak_areas,
142        })
143    }
144
145    /// Detect knowledge gaps for `workspace`.
146    ///
147    /// Three gap types are checked:
148    /// - Sparse topics (< 3 memories per tag)
149    /// - Temporal gaps (no memories for > 7 days)
150    /// - Unresolved questions (memories containing `?`)
151    pub fn detect_gaps(&self, conn: &Connection, workspace: &str) -> Result<Vec<KnowledgeGap>> {
152        let mut gaps: Vec<KnowledgeGap> = Vec::new();
153
154        // -- Sparse topics --
155        let tag_counts = self.count_memories_per_tag(conn, workspace)?;
156        for (tag, count) in &tag_counts {
157            if *count < 3 {
158                // Fetch IDs of memories with this tag so callers can inspect them
159                let related_ids = self.memory_ids_for_tag(conn, workspace, tag)?;
160                gaps.push(KnowledgeGap {
161                    topic: tag.clone(),
162                    confidence: 0.7,
163                    suggestion: format!(
164                        "Only {} memory/memories tagged '{}'. Consider adding more detail.",
165                        count, tag
166                    ),
167                    related_memory_ids: related_ids,
168                });
169            }
170        }
171
172        // -- Temporal gaps --
173        let temporal_gaps = self.find_temporal_gaps(conn, workspace)?;
174        for gap in &temporal_gaps {
175            gaps.push(KnowledgeGap {
176                topic: format!("temporal gap ({:.1} days)", gap.gap_days),
177                confidence: 0.5,
178                suggestion: format!(
179                    "No memories were created for {:.1} days between {} and {}. \
180                     Consider adding a summary of what happened during this period.",
181                    gap.gap_days, gap.from, gap.to
182                ),
183                related_memory_ids: vec![],
184            });
185        }
186
187        // -- Unresolved questions --
188        let question_ids = self.find_question_memory_ids(conn, workspace)?;
189        if !question_ids.is_empty() {
190            gaps.push(KnowledgeGap {
191                topic: "unresolved questions".to_string(),
192                confidence: 0.9,
193                suggestion: format!(
194                    "{} memory/memories contain unresolved questions. \
195                     Recording answers will improve your knowledge base.",
196                    question_ids.len()
197                ),
198                related_memory_ids: question_ids,
199            });
200        }
201
202        Ok(gaps)
203    }
204
205    /// Suggest specific new memories that would close the most important gaps.
206    ///
207    /// Priority order: unresolved questions (1) > sparse topics (2) > temporal gaps (3).
208    /// At most `limit` suggestions are returned (0 = unlimited).
209    pub fn suggest_acquisitions(
210        &self,
211        conn: &Connection,
212        workspace: &str,
213        limit: usize,
214    ) -> Result<Vec<AcquisitionSuggestion>> {
215        let mut suggestions: Vec<AcquisitionSuggestion> = Vec::new();
216
217        // Priority 1 — unresolved questions
218        let question_ids = self.find_question_memory_ids(conn, workspace)?;
219        if !question_ids.is_empty() {
220            let count = question_ids.len();
221            suggestions.push(AcquisitionSuggestion {
222                content_hint: format!(
223                    "Answer the {} outstanding question(s) stored in memories {:?}",
224                    count, question_ids
225                ),
226                suggested_type: "note".to_string(),
227                priority: 1,
228                reason: format!(
229                    "{} memories contain unanswered questions; capturing answers closes these gaps.",
230                    count
231                ),
232            });
233        }
234
235        // Priority 2 — sparse topics
236        let tag_counts = self.count_memories_per_tag(conn, workspace)?;
237        for (tag, count) in &tag_counts {
238            if *count < 3 {
239                suggestions.push(AcquisitionSuggestion {
240                    content_hint: format!(
241                        "Add more information about '{}' (currently only {} memory/memories).",
242                        tag, count
243                    ),
244                    suggested_type: "note".to_string(),
245                    priority: 2,
246                    reason: format!(
247                        "The topic '{}' is under-represented with only {} entry/entries.",
248                        tag, count
249                    ),
250                });
251            }
252        }
253
254        // Priority 3 — temporal gaps
255        let temporal_gaps = self.find_temporal_gaps(conn, workspace)?;
256        for gap in &temporal_gaps {
257            suggestions.push(AcquisitionSuggestion {
258                content_hint: format!(
259                    "Write a summary of events that occurred between {} and {} ({:.1} days).",
260                    gap.from, gap.to, gap.gap_days
261                ),
262                suggested_type: "note".to_string(),
263                priority: 3,
264                reason: format!(
265                    "There is a {:.1}-day gap in your memory timeline with no recorded events.",
266                    gap.gap_days
267                ),
268            });
269        }
270
271        // Sort stable by priority (ascending = highest first)
272        suggestions.sort_by_key(|s| s.priority);
273
274        if limit > 0 {
275            suggestions.truncate(limit);
276        }
277
278        Ok(suggestions)
279    }
280
281    // -------------------------------------------------------------------------
282    // Private helpers
283    // -------------------------------------------------------------------------
284
285    /// Count memories per tag for the given workspace.
286    fn count_memories_per_tag(
287        &self,
288        conn: &Connection,
289        workspace: &str,
290    ) -> Result<Vec<(String, i64)>> {
291        // Tags are stored in a separate `tags` table linked to memories by memory_id.
292        // We join on memories.workspace so we only count within the requested workspace.
293        let mut stmt = conn.prepare(
294            "SELECT t.tag, COUNT(DISTINCT t.memory_id) as cnt
295             FROM tags t
296             JOIN memories m ON m.id = t.memory_id
297             WHERE m.workspace = ?1
298             GROUP BY t.tag
299             ORDER BY cnt DESC",
300        )?;
301        let rows = stmt.query_map(params![workspace], |row| {
302            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
303        })?;
304        let result: rusqlite::Result<Vec<(String, i64)>> = rows.collect();
305        Ok(result?)
306    }
307
308    /// Find temporal gaps > 7 days in the workspace's memory timeline.
309    fn find_temporal_gaps(&self, conn: &Connection, workspace: &str) -> Result<Vec<TemporalGap>> {
310        let mut stmt = conn.prepare(
311            "SELECT created_at FROM memories
312             WHERE workspace = ?1
313             ORDER BY created_at ASC",
314        )?;
315        let timestamps: Vec<String> = stmt
316            .query_map(params![workspace], |row| row.get(0))?
317            .collect::<rusqlite::Result<Vec<String>>>()?;
318
319        let mut gaps = Vec::new();
320        for window in timestamps.windows(2) {
321            let from_str = &window[0];
322            let to_str = &window[1];
323
324            // Parse as naive datetime (RFC3339) — fall back gracefully on error
325            if let (Ok(from_dt), Ok(to_dt)) = (
326                chrono::DateTime::parse_from_rfc3339(from_str),
327                chrono::DateTime::parse_from_rfc3339(to_str),
328            ) {
329                let gap_seconds = (to_dt - from_dt).num_seconds();
330                let gap_days = gap_seconds as f64 / 86_400.0;
331                if gap_days > 7.0 {
332                    gaps.push(TemporalGap {
333                        from: from_str.clone(),
334                        to: to_str.clone(),
335                        gap_days,
336                    });
337                }
338            }
339        }
340        Ok(gaps)
341    }
342
343    /// Find tags that are weak: fewer than 3 memories OR avg importance < 0.3.
344    fn find_weak_areas(&self, conn: &Connection, workspace: &str) -> Result<Vec<WeakArea>> {
345        let mut stmt = conn.prepare(
346            "SELECT t.tag,
347                    COUNT(DISTINCT t.memory_id)       AS cnt,
348                    AVG(COALESCE(m.importance, 0.5))  AS avg_imp
349             FROM tags t
350             JOIN memories m ON m.id = t.memory_id
351             WHERE m.workspace = ?1
352             GROUP BY t.tag",
353        )?;
354        let rows = stmt.query_map(params![workspace], |row| {
355            Ok((
356                row.get::<_, String>(0)?,
357                row.get::<_, i64>(1)?,
358                row.get::<_, f64>(2)?,
359            ))
360        })?;
361
362        let mut weak: Vec<WeakArea> = Vec::new();
363        for row in rows {
364            let (tag, count, avg_imp) = row?;
365            let avg_importance = avg_imp as f32;
366            if count < 3 || avg_importance < 0.3 {
367                let suggestion = if count < 3 {
368                    format!(
369                        "Only {} memory/memories about '{}'. Expand coverage.",
370                        count, tag
371                    )
372                } else {
373                    format!(
374                        "Memories about '{}' have low average importance ({:.2}). \
375                         Review and update their relevance.",
376                        tag, avg_importance
377                    )
378                };
379                weak.push(WeakArea {
380                    topic: tag,
381                    memory_count: count,
382                    avg_importance,
383                    suggestion,
384                });
385            }
386        }
387        Ok(weak)
388    }
389
390    /// Return memory IDs for a given tag within a workspace.
391    fn memory_ids_for_tag(
392        &self,
393        conn: &Connection,
394        workspace: &str,
395        tag: &str,
396    ) -> Result<Vec<i64>> {
397        let mut stmt = conn.prepare(
398            "SELECT t.memory_id FROM tags t
399             JOIN memories m ON m.id = t.memory_id
400             WHERE m.workspace = ?1 AND t.tag = ?2
401             ORDER BY t.memory_id ASC",
402        )?;
403        let ids: Vec<i64> = stmt
404            .query_map(params![workspace, tag], |row| row.get(0))?
405            .collect::<rusqlite::Result<Vec<i64>>>()?;
406        Ok(ids)
407    }
408
409    /// Return IDs of memories whose content contains `?`.
410    fn find_question_memory_ids(&self, conn: &Connection, workspace: &str) -> Result<Vec<i64>> {
411        let mut stmt = conn.prepare(
412            "SELECT id FROM memories
413             WHERE workspace = ?1 AND content LIKE '%?%'
414             ORDER BY id ASC",
415        )?;
416        let ids: Vec<i64> = stmt
417            .query_map(params![workspace], |row| row.get(0))?
418            .collect::<rusqlite::Result<Vec<i64>>>()?;
419        Ok(ids)
420    }
421}
422
423impl Default for GapDetector {
424    fn default() -> Self {
425        Self::new()
426    }
427}
428
429// =============================================================================
430// InterestTracker
431// =============================================================================
432
433/// Tracks user queries to surface topics the user is interested in.
434pub struct InterestTracker;
435
436impl InterestTracker {
437    pub fn new() -> Self {
438        Self
439    }
440
441    /// Record a search query for later analysis.
442    ///
443    /// The query_log table must already exist (see [`CREATE_QUERY_LOG_TABLE`]).
444    pub fn record_query(&self, conn: &Connection, query: &str, workspace: &str) -> Result<()> {
445        let now = Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string();
446        conn.execute(
447            "INSERT INTO query_log (query, workspace, timestamp) VALUES (?1, ?2, ?3)",
448            params![query, workspace, now],
449        )?;
450        Ok(())
451    }
452
453    /// Return the most frequently queried keywords for `workspace`.
454    ///
455    /// Each query is split into lowercase words; counts are aggregated.
456    /// Returns at most `limit` results ordered by frequency descending.
457    /// `limit = 0` returns all results.
458    pub fn get_frequent_topics(
459        &self,
460        conn: &Connection,
461        workspace: &str,
462        limit: usize,
463    ) -> Result<Vec<(String, i64)>> {
464        // Fetch all queries for the workspace
465        let mut stmt = conn.prepare("SELECT query FROM query_log WHERE workspace = ?1")?;
466        let queries: Vec<String> = stmt
467            .query_map(params![workspace], |row| row.get(0))?
468            .collect::<rusqlite::Result<Vec<String>>>()?;
469
470        // Count word frequencies (stop-word filtering)
471        let stop_words: std::collections::HashSet<&str> = [
472            "the", "a", "an", "is", "are", "was", "were", "be", "been", "have", "has", "had", "do",
473            "does", "did", "will", "would", "could", "should", "this", "that", "and", "but", "or",
474            "if", "in", "on", "at", "by", "to", "of", "for", "with", "from", "as", "it", "its",
475            "not", "no",
476        ]
477        .iter()
478        .cloned()
479        .collect();
480
481        let mut counts: std::collections::HashMap<String, i64> = std::collections::HashMap::new();
482
483        for query in &queries {
484            for word in query.split_whitespace() {
485                let w = word
486                    .to_lowercase()
487                    .trim_matches(|c: char| !c.is_alphanumeric())
488                    .to_string();
489                if w.len() > 2 && !stop_words.contains(w.as_str()) {
490                    *counts.entry(w).or_insert(0) += 1;
491                }
492            }
493        }
494
495        let mut sorted: Vec<(String, i64)> = counts.into_iter().collect();
496        sorted.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
497
498        if limit > 0 {
499            sorted.truncate(limit);
500        }
501        Ok(sorted)
502    }
503}
504
505impl Default for InterestTracker {
506    fn default() -> Self {
507        Self::new()
508    }
509}
510
511// =============================================================================
512// Tests
513// =============================================================================
514
515#[cfg(test)]
516mod tests {
517    use super::*;
518    use rusqlite::Connection;
519
520    /// Create an in-memory SQLite database with minimal schema for testing.
521    fn setup_conn() -> Connection {
522        let conn = Connection::open_in_memory().unwrap();
523
524        conn.execute_batch(
525            "CREATE TABLE IF NOT EXISTS memories (
526                id          INTEGER PRIMARY KEY AUTOINCREMENT,
527                content     TEXT    NOT NULL,
528                workspace   TEXT    NOT NULL DEFAULT 'default',
529                importance  REAL    NOT NULL DEFAULT 0.5,
530                created_at  TEXT    NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
531                updated_at  TEXT    NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))
532            );
533            CREATE TABLE IF NOT EXISTS tags (
534                id          INTEGER PRIMARY KEY AUTOINCREMENT,
535                memory_id   INTEGER NOT NULL,
536                tag         TEXT    NOT NULL,
537                FOREIGN KEY(memory_id) REFERENCES memories(id)
538            );",
539        )
540        .unwrap();
541
542        conn.execute_batch(CREATE_QUERY_LOG_TABLE).unwrap();
543
544        conn
545    }
546
547    /// Insert a memory with optional tags and importance.
548    fn insert_memory(
549        conn: &Connection,
550        workspace: &str,
551        content: &str,
552        importance: f32,
553        created_at: &str,
554        tags: &[&str],
555    ) -> i64 {
556        conn.execute(
557            "INSERT INTO memories (content, workspace, importance, created_at, updated_at)
558             VALUES (?1, ?2, ?3, ?4, ?4)",
559            params![content, workspace, importance, created_at],
560        )
561        .unwrap();
562        let id = conn.last_insert_rowid();
563        for tag in tags {
564            conn.execute(
565                "INSERT INTO tags (memory_id, tag) VALUES (?1, ?2)",
566                params![id, tag],
567            )
568            .unwrap();
569        }
570        id
571    }
572
573    // -------------------------------------------------------------------------
574    // Test 1 — coverage report with varied data
575    // -------------------------------------------------------------------------
576    #[test]
577    fn test_coverage_report_with_varied_data() {
578        let conn = setup_conn();
579
580        insert_memory(
581            &conn,
582            "ws",
583            "Rust basics",
584            0.8,
585            "2024-01-01T00:00:00Z",
586            &["rust", "programming"],
587        );
588        insert_memory(
589            &conn,
590            "ws",
591            "Rust lifetimes",
592            0.7,
593            "2024-01-02T00:00:00Z",
594            &["rust"],
595        );
596        insert_memory(
597            &conn,
598            "ws",
599            "Rust traits",
600            0.9,
601            "2024-01-03T00:00:00Z",
602            &["rust"],
603        );
604        insert_memory(
605            &conn,
606            "ws",
607            "Python basics",
608            0.5,
609            "2024-01-04T00:00:00Z",
610            &["python"],
611        );
612
613        let detector = GapDetector::new();
614        let report = detector.analyze_coverage(&conn, "ws").unwrap();
615
616        assert_eq!(report.total_memories, 4);
617
618        // "rust" should be the most represented topic
619        let rust_count = report
620            .topic_distribution
621            .iter()
622            .find(|(tag, _)| tag == "rust")
623            .map(|(_, c)| *c)
624            .unwrap_or(0);
625        assert_eq!(rust_count, 3);
626
627        // "python" and "programming" each appear once → weak areas
628        let python_weak = report.weak_areas.iter().any(|w| w.topic == "python");
629        assert!(python_weak, "python should be a weak area (only 1 memory)");
630    }
631
632    // -------------------------------------------------------------------------
633    // Test 2 — temporal gap detection
634    // -------------------------------------------------------------------------
635    #[test]
636    fn test_temporal_gap_detection() {
637        let conn = setup_conn();
638
639        // Two memories ~20 days apart — should produce a gap
640        insert_memory(&conn, "ws", "Note A", 0.5, "2024-03-01T00:00:00Z", &[]);
641        insert_memory(&conn, "ws", "Note B", 0.5, "2024-03-21T00:00:00Z", &[]);
642
643        let detector = GapDetector::new();
644        let report = detector.analyze_coverage(&conn, "ws").unwrap();
645
646        assert!(
647            !report.temporal_gaps.is_empty(),
648            "should detect a 20-day gap"
649        );
650        let gap = &report.temporal_gaps[0];
651        assert!(gap.gap_days > 19.0 && gap.gap_days < 21.0);
652    }
653
654    // -------------------------------------------------------------------------
655    // Test 3 — no temporal gap when memories are within 7 days
656    // -------------------------------------------------------------------------
657    #[test]
658    fn test_no_temporal_gap_within_7_days() {
659        let conn = setup_conn();
660
661        insert_memory(&conn, "ws", "Note A", 0.5, "2024-03-01T00:00:00Z", &[]);
662        insert_memory(&conn, "ws", "Note B", 0.5, "2024-03-05T00:00:00Z", &[]);
663
664        let detector = GapDetector::new();
665        let report = detector.analyze_coverage(&conn, "ws").unwrap();
666
667        assert!(
668            report.temporal_gaps.is_empty(),
669            "4-day gap should not be reported"
670        );
671    }
672
673    // -------------------------------------------------------------------------
674    // Test 4 — weak area detection (low importance)
675    // -------------------------------------------------------------------------
676    #[test]
677    fn test_weak_area_low_importance() {
678        let conn = setup_conn();
679
680        // Three memories tagged "low-imp" with very low importance
681        for i in 0..3 {
682            insert_memory(
683                &conn,
684                "ws",
685                &format!("Low importance note {}", i),
686                0.1,
687                &format!("2024-05-0{}T00:00:00Z", i + 1),
688                &["low-imp"],
689            );
690        }
691        // One high importance memory tagged "high-imp"
692        insert_memory(
693            &conn,
694            "ws",
695            "Important note",
696            0.9,
697            "2024-05-10T00:00:00Z",
698            &["high-imp"],
699        );
700
701        let detector = GapDetector::new();
702        let report = detector.analyze_coverage(&conn, "ws").unwrap();
703
704        let low_imp_weak = report
705            .weak_areas
706            .iter()
707            .any(|w| w.topic == "low-imp" && w.avg_importance < 0.3);
708        assert!(low_imp_weak, "low-imp should be flagged as weak area");
709
710        let high_imp_weak = report.weak_areas.iter().any(|w| w.topic == "high-imp");
711        // high-imp has only 1 memory → still weak by count; that's expected
712        assert!(
713            high_imp_weak,
714            "high-imp has only 1 memory, still a weak area by count"
715        );
716    }
717
718    // -------------------------------------------------------------------------
719    // Test 5 — suggest acquisitions priority order
720    // -------------------------------------------------------------------------
721    #[test]
722    fn test_suggest_acquisitions_priority_order() {
723        let conn = setup_conn();
724
725        // Unresolved question → priority 1
726        insert_memory(
727            &conn,
728            "ws",
729            "What is the best caching strategy?",
730            0.5,
731            "2024-06-01T00:00:00Z",
732            &[],
733        );
734
735        // Sparse topic → priority 2
736        insert_memory(
737            &conn,
738            "ws",
739            "Note about caching",
740            0.5,
741            "2024-06-02T00:00:00Z",
742            &["caching"],
743        );
744
745        // Temporal gap → priority 3
746        insert_memory(
747            &conn,
748            "ws",
749            "Note before gap",
750            0.5,
751            "2024-01-01T00:00:00Z",
752            &[],
753        );
754        insert_memory(
755            &conn,
756            "ws",
757            "Note after gap",
758            0.5,
759            "2024-03-01T00:00:00Z",
760            &[],
761        );
762
763        let detector = GapDetector::new();
764        let suggestions = detector.suggest_acquisitions(&conn, "ws", 10).unwrap();
765
766        assert!(!suggestions.is_empty());
767
768        // First suggestion must be priority 1 (unresolved questions)
769        assert_eq!(
770            suggestions[0].priority, 1,
771            "first suggestion should be priority 1 (unresolved question)"
772        );
773
774        // Verify all priorities are in non-decreasing order
775        for window in suggestions.windows(2) {
776            assert!(
777                window[0].priority <= window[1].priority,
778                "suggestions should be sorted by priority ascending"
779            );
780        }
781    }
782
783    // -------------------------------------------------------------------------
784    // Test 6 — interest tracking
785    // -------------------------------------------------------------------------
786    #[test]
787    fn test_interest_tracking() {
788        let conn = setup_conn();
789        let tracker = InterestTracker::new();
790
791        tracker
792            .record_query(&conn, "rust async programming", "ws")
793            .unwrap();
794        tracker
795            .record_query(&conn, "rust error handling", "ws")
796            .unwrap();
797        tracker.record_query(&conn, "rust lifetimes", "ws").unwrap();
798        tracker
799            .record_query(&conn, "python web frameworks", "ws")
800            .unwrap();
801
802        let topics = tracker.get_frequent_topics(&conn, "ws", 5).unwrap();
803
804        assert!(!topics.is_empty());
805        // "rust" appears 3 times, should be the top topic
806        let rust_entry = topics.iter().find(|(word, _)| word == "rust");
807        assert!(
808            rust_entry.is_some(),
809            "rust should appear in frequent topics"
810        );
811        assert_eq!(rust_entry.unwrap().1, 3, "rust should have count 3");
812    }
813
814    // -------------------------------------------------------------------------
815    // Test 7 — empty workspace
816    // -------------------------------------------------------------------------
817    #[test]
818    fn test_empty_workspace() {
819        let conn = setup_conn();
820        let detector = GapDetector::new();
821
822        let report = detector.analyze_coverage(&conn, "empty-ws").unwrap();
823
824        assert_eq!(report.total_memories, 0);
825        assert!(report.topic_distribution.is_empty());
826        assert!(report.temporal_gaps.is_empty());
827        assert!(report.weak_areas.is_empty());
828
829        let gaps = detector.detect_gaps(&conn, "empty-ws").unwrap();
830        assert!(gaps.is_empty());
831
832        let suggestions = detector
833            .suggest_acquisitions(&conn, "empty-ws", 10)
834            .unwrap();
835        assert!(suggestions.is_empty());
836    }
837
838    // -------------------------------------------------------------------------
839    // Test 8 — gap detection with unresolved questions
840    // -------------------------------------------------------------------------
841    #[test]
842    fn test_gap_detection_with_questions() {
843        let conn = setup_conn();
844
845        let id1 = insert_memory(
846            &conn,
847            "ws",
848            "How does tokio handle backpressure?",
849            0.7,
850            "2024-07-01T00:00:00Z",
851            &[],
852        );
853        let id2 = insert_memory(
854            &conn,
855            "ws",
856            "What is the difference between Arc and Rc?",
857            0.6,
858            "2024-07-02T00:00:00Z",
859            &[],
860        );
861
862        let detector = GapDetector::new();
863        let gaps = detector.detect_gaps(&conn, "ws").unwrap();
864
865        let question_gap = gaps.iter().find(|g| g.topic == "unresolved questions");
866        assert!(
867            question_gap.is_some(),
868            "should detect unresolved questions gap"
869        );
870
871        let qg = question_gap.unwrap();
872        assert!(qg.related_memory_ids.contains(&id1));
873        assert!(qg.related_memory_ids.contains(&id2));
874        assert!(
875            qg.confidence > 0.8,
876            "confidence for question gaps should be high"
877        );
878    }
879
880    // -------------------------------------------------------------------------
881    // Test 9 — interest tracker respects workspace isolation
882    // -------------------------------------------------------------------------
883    #[test]
884    fn test_interest_tracker_workspace_isolation() {
885        let conn = setup_conn();
886        let tracker = InterestTracker::new();
887
888        tracker
889            .record_query(&conn, "machine learning concepts", "ml-ws")
890            .unwrap();
891        tracker
892            .record_query(&conn, "deep learning tutorial", "ml-ws")
893            .unwrap();
894        tracker
895            .record_query(&conn, "rust ownership", "rust-ws")
896            .unwrap();
897
898        let ml_topics = tracker.get_frequent_topics(&conn, "ml-ws", 10).unwrap();
899        let rust_topics = tracker.get_frequent_topics(&conn, "rust-ws", 10).unwrap();
900
901        // ml-ws should not contain "rust"
902        assert!(!ml_topics.iter().any(|(w, _)| w == "rust"));
903        // rust-ws should not contain "learning"
904        assert!(!rust_topics.iter().any(|(w, _)| w == "learning"));
905    }
906
907    // -------------------------------------------------------------------------
908    // Test 10 — suggest acquisitions respects limit
909    // -------------------------------------------------------------------------
910    #[test]
911    fn test_suggest_acquisitions_limit() {
912        let conn = setup_conn();
913
914        // Create multiple sparse topics to generate many suggestions
915        for i in 0..5 {
916            insert_memory(
917                &conn,
918                "ws",
919                &format!("Note about topic {}", i),
920                0.5,
921                &format!("2024-08-0{}T00:00:00Z", i + 1),
922                &[&format!("topic-{}", i)],
923            );
924        }
925
926        let detector = GapDetector::new();
927        let suggestions = detector.suggest_acquisitions(&conn, "ws", 3).unwrap();
928
929        assert!(suggestions.len() <= 3, "should respect the limit");
930    }
931}