use chrono::Utc;
use rusqlite::{params, Connection};
use serde::{Deserialize, Serialize};
use crate::error::Result;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CoverageReport {
pub total_memories: i64,
pub topic_distribution: Vec<(String, i64)>,
pub temporal_gaps: Vec<TemporalGap>,
pub weak_areas: Vec<WeakArea>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TemporalGap {
pub from: String,
pub to: String,
pub gap_days: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WeakArea {
pub topic: String,
pub memory_count: i64,
pub avg_importance: f32,
pub suggestion: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct KnowledgeGap {
pub topic: String,
pub confidence: f32,
pub suggestion: String,
pub related_memory_ids: Vec<i64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AcquisitionSuggestion {
pub content_hint: String,
pub suggested_type: String,
pub priority: u8,
pub reason: String,
}
pub const CREATE_QUERY_LOG_TABLE: &str = r#"
CREATE TABLE IF NOT EXISTS query_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
query TEXT NOT NULL,
workspace TEXT NOT NULL DEFAULT 'default',
timestamp TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))
);
CREATE INDEX IF NOT EXISTS idx_query_log_workspace ON query_log(workspace);
CREATE INDEX IF NOT EXISTS idx_query_log_timestamp ON query_log(timestamp);
"#;
pub struct GapDetector;
impl GapDetector {
pub fn new() -> Self {
Self
}
pub fn analyze_coverage(&self, conn: &Connection, workspace: &str) -> Result<CoverageReport> {
let total_memories: i64 = conn.query_row(
"SELECT COUNT(*) FROM memories WHERE workspace = ?1",
params![workspace],
|row| row.get(0),
)?;
let topic_distribution = self.count_memories_per_tag(conn, workspace)?;
let temporal_gaps = self.find_temporal_gaps(conn, workspace)?;
let weak_areas = self.find_weak_areas(conn, workspace)?;
Ok(CoverageReport {
total_memories,
topic_distribution,
temporal_gaps,
weak_areas,
})
}
pub fn detect_gaps(&self, conn: &Connection, workspace: &str) -> Result<Vec<KnowledgeGap>> {
let mut gaps: Vec<KnowledgeGap> = Vec::new();
let tag_counts = self.count_memories_per_tag(conn, workspace)?;
for (tag, count) in &tag_counts {
if *count < 3 {
let related_ids = self.memory_ids_for_tag(conn, workspace, tag)?;
gaps.push(KnowledgeGap {
topic: tag.clone(),
confidence: 0.7,
suggestion: format!(
"Only {} memory/memories tagged '{}'. Consider adding more detail.",
count, tag
),
related_memory_ids: related_ids,
});
}
}
let temporal_gaps = self.find_temporal_gaps(conn, workspace)?;
for gap in &temporal_gaps {
gaps.push(KnowledgeGap {
topic: format!("temporal gap ({:.1} days)", gap.gap_days),
confidence: 0.5,
suggestion: format!(
"No memories were created for {:.1} days between {} and {}. \
Consider adding a summary of what happened during this period.",
gap.gap_days, gap.from, gap.to
),
related_memory_ids: vec![],
});
}
let question_ids = self.find_question_memory_ids(conn, workspace)?;
if !question_ids.is_empty() {
gaps.push(KnowledgeGap {
topic: "unresolved questions".to_string(),
confidence: 0.9,
suggestion: format!(
"{} memory/memories contain unresolved questions. \
Recording answers will improve your knowledge base.",
question_ids.len()
),
related_memory_ids: question_ids,
});
}
Ok(gaps)
}
pub fn suggest_acquisitions(
&self,
conn: &Connection,
workspace: &str,
limit: usize,
) -> Result<Vec<AcquisitionSuggestion>> {
let mut suggestions: Vec<AcquisitionSuggestion> = Vec::new();
let question_ids = self.find_question_memory_ids(conn, workspace)?;
if !question_ids.is_empty() {
let count = question_ids.len();
suggestions.push(AcquisitionSuggestion {
content_hint: format!(
"Answer the {} outstanding question(s) stored in memories {:?}",
count, question_ids
),
suggested_type: "note".to_string(),
priority: 1,
reason: format!(
"{} memories contain unanswered questions; capturing answers closes these gaps.",
count
),
});
}
let tag_counts = self.count_memories_per_tag(conn, workspace)?;
for (tag, count) in &tag_counts {
if *count < 3 {
suggestions.push(AcquisitionSuggestion {
content_hint: format!(
"Add more information about '{}' (currently only {} memory/memories).",
tag, count
),
suggested_type: "note".to_string(),
priority: 2,
reason: format!(
"The topic '{}' is under-represented with only {} entry/entries.",
tag, count
),
});
}
}
let temporal_gaps = self.find_temporal_gaps(conn, workspace)?;
for gap in &temporal_gaps {
suggestions.push(AcquisitionSuggestion {
content_hint: format!(
"Write a summary of events that occurred between {} and {} ({:.1} days).",
gap.from, gap.to, gap.gap_days
),
suggested_type: "note".to_string(),
priority: 3,
reason: format!(
"There is a {:.1}-day gap in your memory timeline with no recorded events.",
gap.gap_days
),
});
}
suggestions.sort_by_key(|s| s.priority);
if limit > 0 {
suggestions.truncate(limit);
}
Ok(suggestions)
}
fn count_memories_per_tag(
&self,
conn: &Connection,
workspace: &str,
) -> Result<Vec<(String, i64)>> {
let mut stmt = conn.prepare(
"SELECT t.tag, COUNT(DISTINCT t.memory_id) as cnt
FROM tags t
JOIN memories m ON m.id = t.memory_id
WHERE m.workspace = ?1
GROUP BY t.tag
ORDER BY cnt DESC",
)?;
let rows = stmt.query_map(params![workspace], |row| {
Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
})?;
let result: rusqlite::Result<Vec<(String, i64)>> = rows.collect();
Ok(result?)
}
fn find_temporal_gaps(&self, conn: &Connection, workspace: &str) -> Result<Vec<TemporalGap>> {
let mut stmt = conn.prepare(
"SELECT created_at FROM memories
WHERE workspace = ?1
ORDER BY created_at ASC",
)?;
let timestamps: Vec<String> = stmt
.query_map(params![workspace], |row| row.get(0))?
.collect::<rusqlite::Result<Vec<String>>>()?;
let mut gaps = Vec::new();
for window in timestamps.windows(2) {
let from_str = &window[0];
let to_str = &window[1];
if let (Ok(from_dt), Ok(to_dt)) = (
chrono::DateTime::parse_from_rfc3339(from_str),
chrono::DateTime::parse_from_rfc3339(to_str),
) {
let gap_seconds = (to_dt - from_dt).num_seconds();
let gap_days = gap_seconds as f64 / 86_400.0;
if gap_days > 7.0 {
gaps.push(TemporalGap {
from: from_str.clone(),
to: to_str.clone(),
gap_days,
});
}
}
}
Ok(gaps)
}
fn find_weak_areas(&self, conn: &Connection, workspace: &str) -> Result<Vec<WeakArea>> {
let mut stmt = conn.prepare(
"SELECT t.tag,
COUNT(DISTINCT t.memory_id) AS cnt,
AVG(COALESCE(m.importance, 0.5)) AS avg_imp
FROM tags t
JOIN memories m ON m.id = t.memory_id
WHERE m.workspace = ?1
GROUP BY t.tag",
)?;
let rows = stmt.query_map(params![workspace], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, i64>(1)?,
row.get::<_, f64>(2)?,
))
})?;
let mut weak: Vec<WeakArea> = Vec::new();
for row in rows {
let (tag, count, avg_imp) = row?;
let avg_importance = avg_imp as f32;
if count < 3 || avg_importance < 0.3 {
let suggestion = if count < 3 {
format!(
"Only {} memory/memories about '{}'. Expand coverage.",
count, tag
)
} else {
format!(
"Memories about '{}' have low average importance ({:.2}). \
Review and update their relevance.",
tag, avg_importance
)
};
weak.push(WeakArea {
topic: tag,
memory_count: count,
avg_importance,
suggestion,
});
}
}
Ok(weak)
}
fn memory_ids_for_tag(
&self,
conn: &Connection,
workspace: &str,
tag: &str,
) -> Result<Vec<i64>> {
let mut stmt = conn.prepare(
"SELECT t.memory_id FROM tags t
JOIN memories m ON m.id = t.memory_id
WHERE m.workspace = ?1 AND t.tag = ?2
ORDER BY t.memory_id ASC",
)?;
let ids: Vec<i64> = stmt
.query_map(params![workspace, tag], |row| row.get(0))?
.collect::<rusqlite::Result<Vec<i64>>>()?;
Ok(ids)
}
fn find_question_memory_ids(&self, conn: &Connection, workspace: &str) -> Result<Vec<i64>> {
let mut stmt = conn.prepare(
"SELECT id FROM memories
WHERE workspace = ?1 AND content LIKE '%?%'
ORDER BY id ASC",
)?;
let ids: Vec<i64> = stmt
.query_map(params![workspace], |row| row.get(0))?
.collect::<rusqlite::Result<Vec<i64>>>()?;
Ok(ids)
}
}
impl Default for GapDetector {
fn default() -> Self {
Self::new()
}
}
pub struct InterestTracker;
impl InterestTracker {
pub fn new() -> Self {
Self
}
pub fn record_query(&self, conn: &Connection, query: &str, workspace: &str) -> Result<()> {
let now = Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string();
conn.execute(
"INSERT INTO query_log (query, workspace, timestamp) VALUES (?1, ?2, ?3)",
params![query, workspace, now],
)?;
Ok(())
}
pub fn get_frequent_topics(
&self,
conn: &Connection,
workspace: &str,
limit: usize,
) -> Result<Vec<(String, i64)>> {
let mut stmt = conn.prepare("SELECT query FROM query_log WHERE workspace = ?1")?;
let queries: Vec<String> = stmt
.query_map(params![workspace], |row| row.get(0))?
.collect::<rusqlite::Result<Vec<String>>>()?;
let stop_words: std::collections::HashSet<&str> = [
"the", "a", "an", "is", "are", "was", "were", "be", "been", "have", "has", "had", "do",
"does", "did", "will", "would", "could", "should", "this", "that", "and", "but", "or",
"if", "in", "on", "at", "by", "to", "of", "for", "with", "from", "as", "it", "its",
"not", "no",
]
.iter()
.cloned()
.collect();
let mut counts: std::collections::HashMap<String, i64> = std::collections::HashMap::new();
for query in &queries {
for word in query.split_whitespace() {
let w = word
.to_lowercase()
.trim_matches(|c: char| !c.is_alphanumeric())
.to_string();
if w.len() > 2 && !stop_words.contains(w.as_str()) {
*counts.entry(w).or_insert(0) += 1;
}
}
}
let mut sorted: Vec<(String, i64)> = counts.into_iter().collect();
sorted.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
if limit > 0 {
sorted.truncate(limit);
}
Ok(sorted)
}
}
impl Default for InterestTracker {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use rusqlite::Connection;
fn setup_conn() -> Connection {
let conn = Connection::open_in_memory().unwrap();
conn.execute_batch(
"CREATE TABLE IF NOT EXISTS memories (
id INTEGER PRIMARY KEY AUTOINCREMENT,
content TEXT NOT NULL,
workspace TEXT NOT NULL DEFAULT 'default',
importance REAL NOT NULL DEFAULT 0.5,
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))
);
CREATE TABLE IF NOT EXISTS tags (
id INTEGER PRIMARY KEY AUTOINCREMENT,
memory_id INTEGER NOT NULL,
tag TEXT NOT NULL,
FOREIGN KEY(memory_id) REFERENCES memories(id)
);",
)
.unwrap();
conn.execute_batch(CREATE_QUERY_LOG_TABLE).unwrap();
conn
}
fn insert_memory(
conn: &Connection,
workspace: &str,
content: &str,
importance: f32,
created_at: &str,
tags: &[&str],
) -> i64 {
conn.execute(
"INSERT INTO memories (content, workspace, importance, created_at, updated_at)
VALUES (?1, ?2, ?3, ?4, ?4)",
params![content, workspace, importance, created_at],
)
.unwrap();
let id = conn.last_insert_rowid();
for tag in tags {
conn.execute(
"INSERT INTO tags (memory_id, tag) VALUES (?1, ?2)",
params![id, tag],
)
.unwrap();
}
id
}
#[test]
fn test_coverage_report_with_varied_data() {
let conn = setup_conn();
insert_memory(
&conn,
"ws",
"Rust basics",
0.8,
"2024-01-01T00:00:00Z",
&["rust", "programming"],
);
insert_memory(
&conn,
"ws",
"Rust lifetimes",
0.7,
"2024-01-02T00:00:00Z",
&["rust"],
);
insert_memory(
&conn,
"ws",
"Rust traits",
0.9,
"2024-01-03T00:00:00Z",
&["rust"],
);
insert_memory(
&conn,
"ws",
"Python basics",
0.5,
"2024-01-04T00:00:00Z",
&["python"],
);
let detector = GapDetector::new();
let report = detector.analyze_coverage(&conn, "ws").unwrap();
assert_eq!(report.total_memories, 4);
let rust_count = report
.topic_distribution
.iter()
.find(|(tag, _)| tag == "rust")
.map(|(_, c)| *c)
.unwrap_or(0);
assert_eq!(rust_count, 3);
let python_weak = report.weak_areas.iter().any(|w| w.topic == "python");
assert!(python_weak, "python should be a weak area (only 1 memory)");
}
#[test]
fn test_temporal_gap_detection() {
let conn = setup_conn();
insert_memory(&conn, "ws", "Note A", 0.5, "2024-03-01T00:00:00Z", &[]);
insert_memory(&conn, "ws", "Note B", 0.5, "2024-03-21T00:00:00Z", &[]);
let detector = GapDetector::new();
let report = detector.analyze_coverage(&conn, "ws").unwrap();
assert!(
!report.temporal_gaps.is_empty(),
"should detect a 20-day gap"
);
let gap = &report.temporal_gaps[0];
assert!(gap.gap_days > 19.0 && gap.gap_days < 21.0);
}
#[test]
fn test_no_temporal_gap_within_7_days() {
let conn = setup_conn();
insert_memory(&conn, "ws", "Note A", 0.5, "2024-03-01T00:00:00Z", &[]);
insert_memory(&conn, "ws", "Note B", 0.5, "2024-03-05T00:00:00Z", &[]);
let detector = GapDetector::new();
let report = detector.analyze_coverage(&conn, "ws").unwrap();
assert!(
report.temporal_gaps.is_empty(),
"4-day gap should not be reported"
);
}
#[test]
fn test_weak_area_low_importance() {
let conn = setup_conn();
for i in 0..3 {
insert_memory(
&conn,
"ws",
&format!("Low importance note {}", i),
0.1,
&format!("2024-05-0{}T00:00:00Z", i + 1),
&["low-imp"],
);
}
insert_memory(
&conn,
"ws",
"Important note",
0.9,
"2024-05-10T00:00:00Z",
&["high-imp"],
);
let detector = GapDetector::new();
let report = detector.analyze_coverage(&conn, "ws").unwrap();
let low_imp_weak = report
.weak_areas
.iter()
.any(|w| w.topic == "low-imp" && w.avg_importance < 0.3);
assert!(low_imp_weak, "low-imp should be flagged as weak area");
let high_imp_weak = report.weak_areas.iter().any(|w| w.topic == "high-imp");
assert!(
high_imp_weak,
"high-imp has only 1 memory, still a weak area by count"
);
}
#[test]
fn test_suggest_acquisitions_priority_order() {
let conn = setup_conn();
insert_memory(
&conn,
"ws",
"What is the best caching strategy?",
0.5,
"2024-06-01T00:00:00Z",
&[],
);
insert_memory(
&conn,
"ws",
"Note about caching",
0.5,
"2024-06-02T00:00:00Z",
&["caching"],
);
insert_memory(
&conn,
"ws",
"Note before gap",
0.5,
"2024-01-01T00:00:00Z",
&[],
);
insert_memory(
&conn,
"ws",
"Note after gap",
0.5,
"2024-03-01T00:00:00Z",
&[],
);
let detector = GapDetector::new();
let suggestions = detector.suggest_acquisitions(&conn, "ws", 10).unwrap();
assert!(!suggestions.is_empty());
assert_eq!(
suggestions[0].priority, 1,
"first suggestion should be priority 1 (unresolved question)"
);
for window in suggestions.windows(2) {
assert!(
window[0].priority <= window[1].priority,
"suggestions should be sorted by priority ascending"
);
}
}
#[test]
fn test_interest_tracking() {
let conn = setup_conn();
let tracker = InterestTracker::new();
tracker
.record_query(&conn, "rust async programming", "ws")
.unwrap();
tracker
.record_query(&conn, "rust error handling", "ws")
.unwrap();
tracker.record_query(&conn, "rust lifetimes", "ws").unwrap();
tracker
.record_query(&conn, "python web frameworks", "ws")
.unwrap();
let topics = tracker.get_frequent_topics(&conn, "ws", 5).unwrap();
assert!(!topics.is_empty());
let rust_entry = topics.iter().find(|(word, _)| word == "rust");
assert!(
rust_entry.is_some(),
"rust should appear in frequent topics"
);
assert_eq!(rust_entry.unwrap().1, 3, "rust should have count 3");
}
#[test]
fn test_empty_workspace() {
let conn = setup_conn();
let detector = GapDetector::new();
let report = detector.analyze_coverage(&conn, "empty-ws").unwrap();
assert_eq!(report.total_memories, 0);
assert!(report.topic_distribution.is_empty());
assert!(report.temporal_gaps.is_empty());
assert!(report.weak_areas.is_empty());
let gaps = detector.detect_gaps(&conn, "empty-ws").unwrap();
assert!(gaps.is_empty());
let suggestions = detector
.suggest_acquisitions(&conn, "empty-ws", 10)
.unwrap();
assert!(suggestions.is_empty());
}
#[test]
fn test_gap_detection_with_questions() {
let conn = setup_conn();
let id1 = insert_memory(
&conn,
"ws",
"How does tokio handle backpressure?",
0.7,
"2024-07-01T00:00:00Z",
&[],
);
let id2 = insert_memory(
&conn,
"ws",
"What is the difference between Arc and Rc?",
0.6,
"2024-07-02T00:00:00Z",
&[],
);
let detector = GapDetector::new();
let gaps = detector.detect_gaps(&conn, "ws").unwrap();
let question_gap = gaps.iter().find(|g| g.topic == "unresolved questions");
assert!(
question_gap.is_some(),
"should detect unresolved questions gap"
);
let qg = question_gap.unwrap();
assert!(qg.related_memory_ids.contains(&id1));
assert!(qg.related_memory_ids.contains(&id2));
assert!(
qg.confidence > 0.8,
"confidence for question gaps should be high"
);
}
#[test]
fn test_interest_tracker_workspace_isolation() {
let conn = setup_conn();
let tracker = InterestTracker::new();
tracker
.record_query(&conn, "machine learning concepts", "ml-ws")
.unwrap();
tracker
.record_query(&conn, "deep learning tutorial", "ml-ws")
.unwrap();
tracker
.record_query(&conn, "rust ownership", "rust-ws")
.unwrap();
let ml_topics = tracker.get_frequent_topics(&conn, "ml-ws", 10).unwrap();
let rust_topics = tracker.get_frequent_topics(&conn, "rust-ws", 10).unwrap();
assert!(!ml_topics.iter().any(|(w, _)| w == "rust"));
assert!(!rust_topics.iter().any(|(w, _)| w == "learning"));
}
#[test]
fn test_suggest_acquisitions_limit() {
let conn = setup_conn();
for i in 0..5 {
insert_memory(
&conn,
"ws",
&format!("Note about topic {}", i),
0.5,
&format!("2024-08-0{}T00:00:00Z", i + 1),
&[&format!("topic-{}", i)],
);
}
let detector = GapDetector::new();
let suggestions = detector.suggest_acquisitions(&conn, "ws", 3).unwrap();
assert!(suggestions.len() <= 3, "should respect the limit");
}
}