Skip to main content

tj_core/
recall.rs

1//! Read-only proactive-recall engine. Given the current tool context,
2//! return the most relevant prior confirmed `rejection`/`decision` events
3//! so the agent doesn't re-walk a ruled-out path. Shared by the PostToolUse
4//! push path (claude-memory-60m) and the MCP-output push path (7km).
5
6use crate::event::EventType;
7
8/// One recalled high-signal event that matched the current context.
9#[derive(Debug, Clone, PartialEq)]
10pub struct RecallHit {
11    pub task_id: String,
12    pub event_type: EventType, // Rejection | Decision
13    pub text: String,
14    pub score: f64,
15}
16
17/// Max hits surfaced per call. Autonomously-chosen default, flagged for review.
18pub const DEFAULT_MAX_HITS: usize = 2;
19/// Min blended score to surface. Autonomously-chosen default, flagged for review.
20pub const RELEVANCE_THRESHOLD: f64 = 1.0;
21
22/// Common, low-signal words dropped from the OR-token query. Without this,
23/// a shared stopword like "the" between an unrelated tool call and a prior
24/// rejection scores a spurious hit. Kept deliberately small — just the
25/// high-frequency glue words plus the noise tokens that leak in from the
26/// synthesized tool-call JSON (`Bash: {"command": …}`).
27const STOPWORDS: &[&str] = &[
28    "the", "and", "for", "with", "you", "are", "was", "but", "not", "this", "that", "from", "have",
29    "has", "had", "will", "your", "our", "out", "let", "lets", "command", "output", "input",
30    "tool", "bash", "name", "response",
31];
32
33/// Build an FTS5 OR-of-tokens query from a free-text context string. A raw
34/// multi-word context like "let's switch to axum" parses as an implicit AND
35/// under FTS5, so it would never match a short rejection that only shares one
36/// token. We instead OR the individual word tokens (punctuation stripped,
37/// short tokens and stopwords dropped) so any shared *meaningful* keyword
38/// scores a hit — the same recall intent as `run_rejected`'s MATCH, widened
39/// for prose input. Returns `None` when no usable token survives (caller
40/// then falls back to a raw LIKE on the query).
41fn fts_or_query(query_text: &str) -> Option<String> {
42    let tokens: Vec<String> = query_text
43        .split(|c: char| !c.is_alphanumeric())
44        .filter(|t| t.chars().count() >= 3)
45        .map(|t| t.to_lowercase())
46        .filter(|t| !STOPWORDS.contains(&t.as_str()))
47        .collect();
48    if tokens.is_empty() {
49        return None;
50    }
51    Some(tokens.join(" OR "))
52}
53
54/// Search confirmed `rejection`/`decision` events for ones relevant to the
55/// current context, blending an FTS5/LIKE text signal with artifact overlap.
56///
57/// Read-only: never mutates the JSONL log or any derived table. Returns at
58/// most `max_hits` hits scoring >= [`RELEVANCE_THRESHOLD`], sorted by score
59/// descending with `Rejection` winning ties over `Decision`.
60pub fn relevant_recall(
61    conn: &rusqlite::Connection,
62    query_text: &str,
63    max_hits: usize,
64) -> anyhow::Result<Vec<RecallHit>> {
65    use std::collections::HashMap;
66    if query_text.trim().is_empty() {
67        return Ok(Vec::new());
68    }
69
70    // score keyed by event_id; carry (task_id, type, text) for output.
71    let mut scores: HashMap<String, f64> = HashMap::new();
72    let mut meta: HashMap<String, (String, EventType, String)> = HashMap::new();
73
74    // 1) Text signal: FTS5 OR-of-tokens MATCH, restricted to confirmed
75    //    rejection/decision (mirrors run_rejected's join). The tokenizer
76    //    strips all punctuation, so the resulting `a OR b OR c` query is
77    //    always FTS-safe even when the raw context is noisy tool-call JSON
78    //    (`Bash: {"command":"…"}`) full of `:`/`"`/`{}` that would otherwise
79    //    trip the FTS5 parser. Falls back to a raw LIKE substring only when
80    //    no usable token survives — the same fallback shape run_search uses.
81    let fts_or = fts_or_query(query_text);
82    let use_fts = fts_or.is_some();
83    let sql = if use_fts {
84        "SELECT ei.event_id, ei.task_id, ei.type, sf.text
85         FROM events_index ei
86         JOIN search_fts sf ON sf.event_id = ei.event_id
87         WHERE ei.status = 'confirmed'
88           AND ei.type IN ('rejection','decision')
89           AND search_fts MATCH ?1"
90    } else {
91        "SELECT ei.event_id, ei.task_id, ei.type, sf.text
92         FROM events_index ei
93         JOIN search_fts sf ON sf.event_id = ei.event_id
94         WHERE ei.status = 'confirmed'
95           AND ei.type IN ('rejection','decision')
96           AND sf.text LIKE ?1"
97    };
98    let bind = if let Some(or_query) = fts_or {
99        or_query
100    } else {
101        crate::fts::like_pattern(query_text)
102    };
103    if let Ok(mut stmt) = conn.prepare(sql) {
104        let rows = stmt.query_map(rusqlite::params![bind], |r| {
105            Ok((
106                r.get::<_, String>(0)?,
107                r.get::<_, String>(1)?,
108                r.get::<_, String>(2)?,
109                r.get::<_, String>(3)?,
110            ))
111        });
112        if let Ok(rows) = rows {
113            for row in rows.flatten() {
114                let (eid, tid, ty, text) = row;
115                let et = parse_type(&ty);
116                *scores.entry(eid.clone()).or_insert(0.0) += 1.0; // text-match weight
117                meta.entry(eid).or_insert((tid, et, text));
118            }
119        }
120    }
121
122    // 2) Artifact signal: overlap of artifacts::extract(query_text) against
123    //    events_index.artifacts (mirrors find_related_tasks LIKE scan), same
124    //    confirmed rejection/decision restriction. +weight per shared artifact.
125    let arts = crate::artifacts::extract(query_text);
126    for needle in arts
127        .linked_issues
128        .iter()
129        .chain(arts.commit_hashes.iter())
130        .chain(arts.files.iter())
131    {
132        let pattern = format!("%\"{}\"%", needle.replace('%', "\\%"));
133        if let Ok(mut stmt) = conn.prepare(
134            "SELECT ei.event_id, ei.task_id, ei.type, sf.text
135             FROM events_index ei
136             JOIN search_fts sf ON sf.event_id = ei.event_id
137             WHERE ei.status = 'confirmed'
138               AND ei.type IN ('rejection','decision')
139               AND ei.artifacts LIKE ?1",
140        ) {
141            let rows = stmt.query_map(rusqlite::params![pattern], |r| {
142                Ok((
143                    r.get::<_, String>(0)?,
144                    r.get::<_, String>(1)?,
145                    r.get::<_, String>(2)?,
146                    r.get::<_, String>(3)?,
147                ))
148            });
149            if let Ok(rows) = rows {
150                for row in rows.flatten() {
151                    let (eid, tid, ty, text) = row;
152                    let et = parse_type(&ty);
153                    *scores.entry(eid.clone()).or_insert(0.0) += 0.5; // artifact weight
154                    meta.entry(eid).or_insert((tid, et, text));
155                }
156            }
157        }
158    }
159
160    // 3) Threshold + rank. Sort by score desc; tie → Rejection before Decision.
161    let mut hits: Vec<RecallHit> = scores
162        .into_iter()
163        .filter(|(_, s)| *s >= RELEVANCE_THRESHOLD)
164        .filter_map(|(eid, score)| {
165            meta.remove(&eid)
166                .map(|(task_id, event_type, text)| RecallHit {
167                    task_id,
168                    event_type,
169                    text,
170                    score,
171                })
172        })
173        .collect();
174    hits.sort_by(|a, b| {
175        b.score
176            .partial_cmp(&a.score)
177            .unwrap_or(std::cmp::Ordering::Equal)
178            .then_with(|| rank(a.event_type).cmp(&rank(b.event_type)))
179    });
180    hits.truncate(max_hits);
181    Ok(hits)
182}
183
184fn parse_type(s: &str) -> EventType {
185    match s {
186        "rejection" => EventType::Rejection,
187        _ => EventType::Decision,
188    }
189}
190
191// Rejection ranks before Decision on a tie.
192fn rank(t: EventType) -> u8 {
193    match t {
194        EventType::Rejection => 0,
195        _ => 1,
196    }
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202    use crate::db;
203    use crate::event::{Author, Event, EventStatus, EventType, Source};
204
205    // Open a temp db and ingest a slice of events through the same
206    // `index_event` path db.rs tests use (db::open + index_event).
207    fn seeded(events: &[Event]) -> (tempfile::TempDir, rusqlite::Connection) {
208        let d = tempfile::TempDir::new().unwrap();
209        let conn = db::open(d.path().join("s.sqlite")).unwrap();
210        for e in events {
211            db::index_event(&conn, e).unwrap();
212        }
213        (d, conn)
214    }
215
216    fn ev(task: &str, ty: EventType, text: &str, status: EventStatus) -> Event {
217        let mut e = Event::new(task, ty, Author::Agent, Source::Chat, text.into());
218        e.status = status;
219        e
220    }
221
222    #[test]
223    fn returns_matching_confirmed_rejection() {
224        let rej = ev(
225            "tj-1",
226            EventType::Rejection,
227            "Tried switching the server to axum but it broke rmcp stdio.",
228            EventStatus::Confirmed,
229        );
230        let (_d, conn) = seeded(&[rej]);
231
232        let hits = relevant_recall(&conn, "let's switch to axum", DEFAULT_MAX_HITS).unwrap();
233        assert_eq!(hits.len(), 1);
234        assert_eq!(hits[0].event_type, EventType::Rejection);
235        assert!(hits[0].text.contains("axum"));
236    }
237
238    #[test]
239    fn ignores_suggested_and_wrong_type() {
240        let suggested = ev(
241            "tj-1",
242            EventType::Rejection,
243            "Rejected the axum migration tentatively.",
244            EventStatus::Suggested,
245        );
246        let finding = ev(
247            "tj-1",
248            EventType::Finding,
249            "The axum server starts fine in isolation.",
250            EventStatus::Confirmed,
251        );
252        let (_d, conn) = seeded(&[suggested, finding]);
253
254        let hits = relevant_recall(&conn, "axum", DEFAULT_MAX_HITS).unwrap();
255        assert!(hits.is_empty(), "got: {hits:?}");
256    }
257
258    #[test]
259    fn caps_at_max_hits() {
260        let events: Vec<Event> = (0..5)
261            .map(|i| {
262                ev(
263                    "tj-1",
264                    EventType::Rejection,
265                    &format!("Rejected widget approach number {i} for the dashboard"),
266                    EventStatus::Confirmed,
267                )
268            })
269            .collect();
270        let (_d, conn) = seeded(&events);
271
272        let hits = relevant_recall(&conn, "dashboard widget", 2).unwrap();
273        assert_eq!(hits.len(), 2);
274    }
275
276    #[test]
277    fn rejection_wins_tie_over_decision() {
278        let decision = ev(
279            "tj-1",
280            EventType::Decision,
281            "Decided to use the postgres connector.",
282            EventStatus::Confirmed,
283        );
284        let rejection = ev(
285            "tj-2",
286            EventType::Rejection,
287            "Rejected the postgres connector for latency.",
288            EventStatus::Confirmed,
289        );
290        let (_d, conn) = seeded(&[decision, rejection]);
291
292        let hits = relevant_recall(&conn, "postgres connector", DEFAULT_MAX_HITS).unwrap();
293        assert_eq!(hits.len(), 2);
294        // Same text-match score (1.0 each) → rejection ranks first.
295        assert_eq!(hits[0].event_type, EventType::Rejection);
296        assert_eq!(hits[1].event_type, EventType::Decision);
297    }
298
299    #[test]
300    fn below_threshold_returns_empty() {
301        // No textual or artifact overlap → score stays 0 < threshold.
302        let rej = ev(
303            "tj-1",
304            EventType::Rejection,
305            "Rejected the kafka pipeline for cost reasons.",
306            EventStatus::Confirmed,
307        );
308        let (_d, conn) = seeded(&[rej]);
309
310        let hits = relevant_recall(&conn, "frontend styling refactor", DEFAULT_MAX_HITS).unwrap();
311        assert!(hits.is_empty(), "got: {hits:?}");
312    }
313
314    #[test]
315    fn empty_query_returns_empty() {
316        let rej = ev(
317            "tj-1",
318            EventType::Rejection,
319            "Rejected axum.",
320            EventStatus::Confirmed,
321        );
322        let (_d, conn) = seeded(&[rej]);
323
324        assert!(relevant_recall(&conn, "", DEFAULT_MAX_HITS)
325            .unwrap()
326            .is_empty());
327        assert!(relevant_recall(&conn, "   ", DEFAULT_MAX_HITS)
328            .unwrap()
329            .is_empty());
330    }
331}