Skip to main content

car_memgine/
reflection.rs

1//! Self-reflection — learns from conversation patterns.
2//!
3//! After a session, analyzes conversation history for:
4//! 1. User corrections (repeated mistakes, rejected suggestions)
5//! 2. Friction points (repeated manual steps)
6//! 3. Disagreements (user overrides agent recommendation)
7//!
8//! Extracts these as anti-pattern/gotcha facts with high confidence.
9//! Inspired by metaswarm's /self-reflect workflow.
10
11use crate::graph::{FactMetadata, MemKind, MemNode, Provenance};
12use crate::trajectory::{Trajectory, TrajectoryOutcome};
13use car_ir::json_extract::extract_json_object;
14use chrono::Utc;
15use serde::{Deserialize, Serialize};
16
17/// A reflection insight extracted from conversation analysis.
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct ReflectionInsight {
20    /// What was learned.
21    pub fact: String,
22    /// Actionable recommendation.
23    pub recommendation: String,
24    /// Category: "correction", "anti_pattern", "gotcha", "preference", "friction".
25    pub category: String,
26    /// Confidence level.
27    pub confidence: String,
28    /// Tags for retrieval.
29    pub tags: Vec<String>,
30    /// Raw references back to the source material (conversation turn keys,
31    /// trajectory/event IDs). Preserved instead of compressed so a harness
32    /// optimizer can replay the full trace rather than only the summary.
33    #[serde(default)]
34    pub trace_refs: Vec<String>,
35}
36
37/// Report from a reflection pass.
38#[derive(Debug, Clone, Default)]
39pub struct ReflectionReport {
40    pub corrections_found: usize,
41    pub preferences_found: usize,
42    pub friction_points_found: usize,
43    pub insights_ingested: usize,
44}
45
46// --- Correction detection keywords ---
47
48const CORRECTION_MARKERS: &[&str] = &[
49    "no, ",
50    "no not",
51    "don't do",
52    "stop doing",
53    "that's wrong",
54    "actually,",
55    "instead,",
56    "not that",
57    "I said",
58    "I meant",
59    "please don't",
60    "undo that",
61    "revert",
62    "that's not what",
63    "wrong approach",
64    "bad idea",
65];
66
67const PREFERENCE_MARKERS: &[&str] = &[
68    "i prefer",
69    "always use",
70    "never use",
71    "from now on",
72    "in the future",
73    "remember that",
74    "keep doing",
75    "good job",
76    "yes exactly",
77    "perfect",
78    "that's right",
79];
80
81const FRICTION_MARKERS: &[&str] = &[
82    "again",
83    "like i said",
84    "i already told you",
85    "for the third time",
86    "as i mentioned",
87    "same as before",
88    "we discussed this",
89    "i keep having to",
90];
91
92/// Analyze conversation nodes for correction/preference/friction patterns.
93/// Returns insights without needing inference.
94pub fn heuristic_reflect(conversations: &[&MemNode]) -> Vec<ReflectionInsight> {
95    let mut insights = Vec::new();
96
97    // Only look at user messages (not assistant)
98    let user_turns: Vec<&MemNode> = conversations
99        .iter()
100        .filter(|n| n.kind == MemKind::Conversation)
101        .filter(|n| {
102            let lower = n.value.to_lowercase();
103            lower.starts_with("user:") || n.key == "user"
104        })
105        .copied()
106        .collect();
107
108    for (i, turn) in user_turns.iter().enumerate() {
109        let lower = turn.value.to_lowercase();
110        let cur_ref = turn_ref(turn);
111        let prev_ref = if i > 0 {
112            Some(turn_ref(user_turns[i - 1]))
113        } else {
114            None
115        };
116
117        // Correction detection
118        if CORRECTION_MARKERS.iter().any(|m| lower.contains(m)) {
119            // The correction itself is the insight; look at what was being corrected
120            let context = if i > 0 {
121                format!("(following: {})", truncate(&user_turns[i - 1].value, 100))
122            } else {
123                String::new()
124            };
125            let mut refs = vec![cur_ref.clone()];
126            if let Some(r) = &prev_ref {
127                refs.push(r.clone());
128            }
129            insights.push(ReflectionInsight {
130                fact: format!(
131                    "User correction: {} {}",
132                    truncate(&turn.value, 200),
133                    context
134                ),
135                recommendation: extract_recommendation(&turn.value),
136                category: "correction".to_string(),
137                confidence: "high".to_string(),
138                tags: vec!["user_feedback".to_string(), "correction".to_string()],
139                trace_refs: refs,
140            });
141        }
142
143        // Preference detection
144        if PREFERENCE_MARKERS.iter().any(|m| lower.contains(m)) {
145            let is_positive = [
146                "good job",
147                "yes exactly",
148                "perfect",
149                "that's right",
150                "keep doing",
151            ]
152            .iter()
153            .any(|m| lower.contains(m));
154            insights.push(ReflectionInsight {
155                fact: format!("User preference: {}", truncate(&turn.value, 200)),
156                recommendation: if is_positive {
157                    "Continue this approach.".to_string()
158                } else {
159                    extract_recommendation(&turn.value)
160                },
161                category: "preference".to_string(),
162                confidence: "high".to_string(),
163                tags: vec!["user_feedback".to_string(), "preference".to_string()],
164                trace_refs: vec![cur_ref.clone()],
165            });
166        }
167
168        // Friction detection (repetition signals)
169        if FRICTION_MARKERS.iter().any(|m| lower.contains(m)) {
170            insights.push(ReflectionInsight {
171                fact: format!("Friction point: {}", truncate(&turn.value, 200)),
172                recommendation: "Automate or remember this to avoid user repetition.".to_string(),
173                category: "friction".to_string(),
174                confidence: "medium".to_string(),
175                tags: vec!["user_feedback".to_string(), "friction".to_string()],
176                trace_refs: vec![cur_ref.clone()],
177            });
178        }
179    }
180
181    insights
182}
183
184/// Stable identifier for a conversation turn MemNode.
185/// Prefers `fact_id` when set, otherwise composes from key + creation timestamp.
186fn turn_ref(node: &MemNode) -> String {
187    if let Some(fid) = &node.fact_id {
188        return format!("conv:{}", fid);
189    }
190    format!("conv:{}@{}", node.key, node.created_at.timestamp_millis())
191}
192
193/// Reflect on tool-execution trajectories. Emits insights pointing at raw
194/// TraceEvent references instead of text-summarized failures — the paper's
195/// finding is that compressed feedback loses the signal a harness optimizer
196/// needs.
197///
198/// For each failed trajectory this extracts:
199/// - one insight per failed tool invocation, with `trace_refs` pointing at
200///   `trajectory:{proposal_id}:event:{index}` so callers can replay the raw event.
201pub fn reflect_from_trajectories(trajs: &[Trajectory]) -> Vec<ReflectionInsight> {
202    let mut insights = Vec::new();
203    for t in trajs {
204        let failed = matches!(
205            t.outcome,
206            TrajectoryOutcome::Failed | TrajectoryOutcome::ReplanExhausted
207        );
208        if !failed {
209            continue;
210        }
211        for (idx, ev) in t.events.iter().enumerate() {
212            if ev.kind != "action_failed" {
213                continue;
214            }
215            let tool = ev.tool.clone().unwrap_or_else(|| "<unknown>".into());
216            let err = ev
217                .data
218                .get("error")
219                .and_then(|v| v.as_str())
220                .unwrap_or("")
221                .to_string();
222            insights.push(ReflectionInsight {
223                fact: if err.is_empty() {
224                    format!("Tool `{}` failed in proposal {}", tool, t.proposal_id)
225                } else {
226                    format!(
227                        "Tool `{}` failed in proposal {}: {}",
228                        tool,
229                        t.proposal_id,
230                        truncate(&err, 200)
231                    )
232                },
233                recommendation: format!(
234                    "Inspect raw trace (trajectory:{}:event:{}) before retrying `{}`.",
235                    t.proposal_id, idx, tool
236                ),
237                category: "anti_pattern".to_string(),
238                confidence: "high".to_string(),
239                tags: vec!["tool_failure".to_string(), format!("tool:{}", tool)],
240                trace_refs: vec![format!("trajectory:{}:event:{}", t.proposal_id, idx)],
241            });
242        }
243    }
244    insights
245}
246
247/// Build an inference prompt for deeper reflection.
248pub fn reflection_prompt(conversations: &[&MemNode]) -> String {
249    let turns: Vec<String> = conversations
250        .iter()
251        .filter(|n| n.kind == MemKind::Conversation)
252        .map(|n| n.value.clone())
253        .collect();
254
255    format!(
256        r#"Analyze this conversation session for learning opportunities.
257
258## Conversation
259{turns}
260
261Look for:
2621. **Corrections**: Where the user corrected or redirected the assistant
2632. **Anti-patterns**: Approaches that failed or were rejected
2643. **Preferences**: User-stated preferences about how to work
2654. **Friction**: Things the user had to repeat or explain multiple times
266
267For each finding, extract an actionable insight.
268
269Respond with ONLY a JSON object:
270```json
271{{
272  "insights": [
273    {{
274      "fact": "What was observed",
275      "recommendation": "What to do differently",
276      "category": "correction|anti_pattern|preference|friction",
277      "confidence": "high|medium|low",
278      "tags": ["tag1", "tag2"]
279    }}
280  ]
281}}
282```"#,
283        turns = turns.join("\n"),
284    )
285}
286
287/// Parse reflection inference response.
288pub fn parse_reflection_response(response: &str) -> Vec<ReflectionInsight> {
289    if let Some(json_str) = extract_json_object(response) {
290        if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&json_str) {
291            if let Some(insights) = parsed.get("insights").and_then(|i| i.as_array()) {
292                return insights
293                    .iter()
294                    .filter_map(|i| {
295                        Some(ReflectionInsight {
296                            fact: i.get("fact")?.as_str()?.to_string(),
297                            recommendation: i.get("recommendation")?.as_str()?.to_string(),
298                            category: i
299                                .get("category")
300                                .and_then(|c| c.as_str())
301                                .unwrap_or("correction")
302                                .to_string(),
303                            confidence: i
304                                .get("confidence")
305                                .and_then(|c| c.as_str())
306                                .unwrap_or("medium")
307                                .to_string(),
308                            tags: i
309                                .get("tags")
310                                .and_then(|t| t.as_array())
311                                .map(|a| {
312                                    a.iter()
313                                        .filter_map(|v| v.as_str().map(String::from))
314                                        .collect()
315                                })
316                                .unwrap_or_default(),
317                            trace_refs: i
318                                .get("trace_refs")
319                                .and_then(|t| t.as_array())
320                                .map(|a| {
321                                    a.iter()
322                                        .filter_map(|v| v.as_str().map(String::from))
323                                        .collect()
324                                })
325                                .unwrap_or_default(),
326                        })
327                    })
328                    .collect();
329            }
330        }
331    }
332    Vec::new()
333}
334
335/// Build FactMetadata for a reflection insight.
336pub fn insight_metadata(insight: &ReflectionInsight) -> FactMetadata {
337    let now = Utc::now();
338    let mut provenance = vec![Provenance {
339        source: "reflection".to_string(),
340        reference: "conversation analysis".to_string(),
341        date: Some(now),
342    }];
343    // Preserve raw trace references as distinct provenance entries so a
344    // downstream optimizer can walk back to the original trace, not a summary.
345    for r in &insight.trace_refs {
346        provenance.push(Provenance {
347            source: "trace".to_string(),
348            reference: r.clone(),
349            date: Some(now),
350        });
351    }
352    FactMetadata {
353        confidence: insight.confidence.clone(),
354        provenance,
355        affected_files: Vec::new(),
356        tags: insight.tags.clone(),
357        category: insight.category.clone(),
358        usage_count: 0,
359        helpful_count: 0,
360        outdated_reports: 0,
361        tenant_id: None,
362    }
363}
364
365fn truncate(s: &str, max: usize) -> &str {
366    if s.len() <= max {
367        s
368    } else {
369        let end = s.floor_char_boundary(max);
370        &s[..end]
371    }
372}
373
374fn extract_recommendation(text: &str) -> String {
375    // Try to find the actionable part after correction markers
376    let lower = text.to_lowercase();
377    for marker in &["instead,", "actually,", "please ", "use ", "don't "] {
378        if let Some(pos) = lower.find(marker) {
379            let remainder = &text[pos..];
380            let end = remainder.find('.').unwrap_or(remainder.len().min(150));
381            return remainder[..end].trim().to_string();
382        }
383    }
384    "Apply the user's correction.".to_string()
385}
386
387#[cfg(test)]
388mod tests {
389    use super::*;
390    use crate::graph::{ContentType, MemKind};
391
392    fn conv(speaker: &str, text: &str) -> MemNode {
393        MemNode {
394            kind: MemKind::Conversation,
395            layer: 3,
396            key: speaker.to_string(),
397            value: format!("{}: {}", speaker, text),
398            fact_id: None,
399            scope: "global".to_string(),
400            authority: "peer".to_string(),
401            is_constraint: false,
402            created_at: Utc::now(),
403            expires_at: None,
404            content_type: ContentType::NaturalLanguage,
405            metadata: FactMetadata::default(),
406        }
407    }
408
409    #[test]
410    fn detects_corrections() {
411        let turns = vec![
412            conv("user", "Add a REST endpoint"),
413            conv("assistant", "I'll add a GraphQL mutation"),
414            conv("user", "No, not GraphQL. I said REST endpoint."),
415        ];
416        let refs: Vec<&MemNode> = turns.iter().collect();
417        let insights = heuristic_reflect(&refs);
418        assert!(!insights.is_empty());
419        assert!(insights.iter().any(|i| i.category == "correction"));
420    }
421
422    #[test]
423    fn detects_preferences() {
424        let turns = vec![conv(
425            "user",
426            "I prefer using snake_case for all function names.",
427        )];
428        let refs: Vec<&MemNode> = turns.iter().collect();
429        let insights = heuristic_reflect(&refs);
430        assert!(insights.iter().any(|i| i.category == "preference"));
431    }
432
433    #[test]
434    fn detects_friction() {
435        let turns = vec![conv("user", "As I mentioned, use PostgreSQL not SQLite.")];
436        let refs: Vec<&MemNode> = turns.iter().collect();
437        let insights = heuristic_reflect(&refs);
438        assert!(insights.iter().any(|i| i.category == "friction"));
439    }
440
441    #[test]
442    fn no_insights_from_normal_conversation() {
443        let turns = vec![
444            conv("user", "What's the weather like?"),
445            conv("assistant", "I can't check the weather."),
446        ];
447        let refs: Vec<&MemNode> = turns.iter().collect();
448        let insights = heuristic_reflect(&refs);
449        assert!(insights.is_empty());
450    }
451
452    #[test]
453    fn heuristic_attaches_trace_refs() {
454        let turns = vec![conv(
455            "user",
456            "I prefer using snake_case for all function names.",
457        )];
458        let refs: Vec<&MemNode> = turns.iter().collect();
459        let insights = heuristic_reflect(&refs);
460        assert!(!insights.is_empty());
461        assert!(
462            insights.iter().all(|i| !i.trace_refs.is_empty()),
463            "every insight must preserve at least one trace_ref"
464        );
465        assert!(insights[0].trace_refs[0].starts_with("conv:"));
466    }
467
468    #[test]
469    fn reflect_from_failed_trajectories_preserves_event_refs() {
470        use crate::{TraceEvent, Trajectory, TrajectoryOutcome};
471        let traj = Trajectory {
472            proposal_id: "p-42".into(),
473            source: "test".into(),
474            action_count: 2,
475            events: vec![
476                TraceEvent {
477                    kind: "action_succeeded".into(),
478                    action_id: Some("a1".into()),
479                    tool: Some("search".into()),
480                    data: serde_json::json!({}),
481                    ..Default::default()
482                },
483                TraceEvent {
484                    kind: "action_failed".into(),
485                    action_id: Some("a2".into()),
486                    tool: Some("write_file".into()),
487                    data: serde_json::json!({"error": "permission denied"}),
488                    ..Default::default()
489                },
490            ],
491            outcome: TrajectoryOutcome::Failed,
492            timestamp: Utc::now(),
493            duration_ms: 120.0,
494            replan_attempts: 0,
495        };
496        let insights = reflect_from_trajectories(&[traj]);
497        assert_eq!(insights.len(), 1);
498        assert_eq!(insights[0].category, "anti_pattern");
499        assert_eq!(
500            insights[0].trace_refs,
501            vec!["trajectory:p-42:event:1".to_string()]
502        );
503        assert!(insights[0].fact.contains("permission denied"));
504    }
505
506    #[test]
507    fn reflect_from_successful_trajectories_emits_nothing() {
508        use crate::{Trajectory, TrajectoryOutcome};
509        let traj = Trajectory {
510            proposal_id: "p-ok".into(),
511            source: "test".into(),
512            action_count: 0,
513            events: vec![],
514            outcome: TrajectoryOutcome::Success,
515            timestamp: Utc::now(),
516            duration_ms: 10.0,
517            replan_attempts: 0,
518        };
519        assert!(reflect_from_trajectories(&[traj]).is_empty());
520    }
521
522    #[test]
523    fn insight_metadata_persists_trace_refs_as_provenance() {
524        let insight = ReflectionInsight {
525            fact: "x".into(),
526            recommendation: "y".into(),
527            category: "correction".into(),
528            confidence: "high".into(),
529            tags: vec![],
530            trace_refs: vec!["conv:abc@123".into()],
531        };
532        let meta = insight_metadata(&insight);
533        assert_eq!(meta.provenance.len(), 2);
534        assert_eq!(meta.provenance[1].source, "trace");
535        assert_eq!(meta.provenance[1].reference, "conv:abc@123");
536    }
537
538    #[test]
539    fn parse_reflection_json() {
540        let response = r#"```json
541{"insights": [{"fact": "User prefers Rust", "recommendation": "Use Rust", "category": "preference", "confidence": "high", "tags": ["language"]}]}
542```"#;
543        let insights = parse_reflection_response(response);
544        assert_eq!(insights.len(), 1);
545        assert_eq!(insights[0].category, "preference");
546    }
547
548    #[test]
549    fn truncate_ascii() {
550        assert_eq!(truncate("hello world", 5), "hello");
551    }
552
553    #[test]
554    fn truncate_no_op() {
555        assert_eq!(truncate("hi", 10), "hi");
556    }
557
558    #[test]
559    fn truncate_emoji() {
560        let s = "\u{1F600}\u{1F601}\u{1F602}"; // 3 emoji = 12 bytes
561        let result = truncate(s, 5);
562        assert_eq!(result, "\u{1F600}");
563    }
564
565    #[test]
566    fn truncate_multibyte_boundary() {
567        let s = "caf\u{00E9}"; // "cafe" with accented e (2 bytes for e-acute)
568        let result = truncate(s, 4);
569        assert_eq!(result, "caf");
570    }
571}