retro_core/analysis/
mod.rs

1pub mod backend;
2pub mod claude_cli;
3pub mod merge;
4pub mod prompts;
5
6use crate::config::Config;
7use crate::db;
8use crate::errors::CoreError;
9use crate::ingest::{context, session};
10use crate::models::{AnalysisResponse, AnalyzeResult, BatchDetail};
11use crate::scrub;
12use chrono::{Duration, Utc};
13use rusqlite::Connection;
14use std::path::Path;
15
16use backend::AnalysisBackend;
17use claude_cli::ClaudeCliBackend;
18
19pub const BATCH_SIZE: usize = 20;
20
21/// JSON schema for constrained decoding of analysis responses.
22/// Flat schema — serde's `#[serde(tag = "action")]` handles variant discrimination.
23/// All fields optional except `action`; `additionalProperties: false` required by structured output.
24pub const ANALYSIS_RESPONSE_SCHEMA: &str = r#"{
25  "type": "object",
26  "properties": {
27    "reasoning": {"type": "string"},
28    "patterns": {
29      "type": "array",
30      "items": {
31        "type": "object",
32        "properties": {
33          "action": {"type": "string", "enum": ["new", "update"]},
34          "pattern_type": {"type": "string", "enum": ["repetitive_instruction", "recurring_mistake", "workflow_pattern", "stale_context", "redundant_context"]},
35          "description": {"type": "string"},
36          "confidence": {"type": "number"},
37          "source_sessions": {"type": "array", "items": {"type": "string"}},
38          "related_files": {"type": "array", "items": {"type": "string"}},
39          "suggested_content": {"type": "string"},
40          "suggested_target": {"type": "string", "enum": ["skill", "claude_md", "global_agent", "db_only"]},
41          "existing_id": {"type": "string"},
42          "new_sessions": {"type": "array", "items": {"type": "string"}},
43          "new_confidence": {"type": "number"}
44        },
45        "required": ["action"],
46        "additionalProperties": false
47      }
48    }
49  },
50  "required": ["reasoning", "patterns"],
51  "additionalProperties": false
52}"#;
53
54/// Run analysis: re-parse sessions, scrub, call AI, merge patterns, store results.
55///
56/// `on_batch_start` is called before each AI call with (batch_index, total_batches, session_count, prompt_chars).
57pub fn analyze<F>(
58    conn: &Connection,
59    config: &Config,
60    project: Option<&str>,
61    window_days: u32,
62    on_batch_start: F,
63) -> Result<AnalyzeResult, CoreError>
64where
65    F: Fn(usize, usize, usize, usize),
66{
67    // Check claude CLI availability and auth
68    if !ClaudeCliBackend::is_available() {
69        return Err(CoreError::Analysis(
70            "claude CLI not found on PATH. Install Claude Code CLI to use analysis.".to_string(),
71        ));
72    }
73    // Pre-flight auth check: a minimal prompt without --json-schema returns immediately
74    // on auth failure. With --json-schema, auth errors cause an infinite StructuredOutput
75    // retry loop in the CLI (it keeps injecting "You MUST call StructuredOutput" but the
76    // auth error response is always plain text, never a tool call).
77    ClaudeCliBackend::check_auth()?;
78
79    let since = Utc::now() - Duration::days(window_days as i64);
80
81    // Get sessions to analyze — rolling_window=true re-analyzes all sessions in window,
82    // false only picks up sessions not yet analyzed.
83    let rolling = config.analysis.rolling_window;
84    let sessions_to_analyze = db::get_sessions_for_analysis(conn, project, &since, rolling)?;
85
86    if sessions_to_analyze.is_empty() {
87        return Ok(AnalyzeResult {
88            sessions_analyzed: 0,
89            new_patterns: 0,
90            updated_patterns: 0,
91            total_patterns: 0,
92            input_tokens: 0,
93            output_tokens: 0,
94            batch_details: Vec::new(),
95        });
96    }
97
98    // Re-parse session files from disk to get full content
99    let mut parsed_sessions = Vec::new();
100    for ingested in &sessions_to_analyze {
101        let path = Path::new(&ingested.session_path);
102        if !path.exists() {
103            eprintln!(
104                "warning: session file not found: {}",
105                ingested.session_path
106            );
107            continue;
108        }
109
110        match session::parse_session_file(path, &ingested.session_id, &ingested.project) {
111            Ok(mut s) => {
112                // Apply secret scrubbing if enabled
113                if config.privacy.scrub_secrets {
114                    scrub::scrub_session(&mut s);
115                }
116                parsed_sessions.push(s);
117            }
118            Err(e) => {
119                eprintln!(
120                    "warning: failed to re-parse session {}: {e}",
121                    ingested.session_id
122                );
123            }
124        }
125    }
126
127    // Filter out low-signal sessions: single-message sessions are typically
128    // programmatic `claude -p` calls (including retro's own analysis) or heavily
129    // compacted sessions — not real multi-turn conversations with discoverable patterns.
130    let before_filter = parsed_sessions.len();
131    parsed_sessions.retain(|s| s.user_messages.len() >= 2);
132    let filtered_out = before_filter - parsed_sessions.len();
133    if filtered_out > 0 {
134        eprintln!(
135            "  Skipped {} single-message session{} (no pattern signal)",
136            filtered_out,
137            if filtered_out == 1 { "" } else { "s" }
138        );
139    }
140
141    let analyzed_count = parsed_sessions.len();
142
143    if parsed_sessions.is_empty() {
144        // Still record all sessions as analyzed so we don't re-process low-signal ones
145        for ingested in &sessions_to_analyze {
146            db::record_analyzed_session(conn, &ingested.session_id, &ingested.project)?;
147        }
148        return Ok(AnalyzeResult {
149            sessions_analyzed: 0,
150            new_patterns: 0,
151            updated_patterns: 0,
152            total_patterns: 0,
153            input_tokens: 0,
154            output_tokens: 0,
155            batch_details: Vec::new(),
156        });
157    }
158
159    // Load context summary (best-effort — analysis proceeds without it)
160    let context_summary = match project {
161        Some(project_path) => context::snapshot_context(config, project_path)
162            .ok()
163            .map(|s| prompts::build_context_summary(&s))
164            .filter(|s| !s.is_empty()),
165        None => None,
166    };
167
168    // Create AI backend
169    let backend = ClaudeCliBackend::new(&config.ai);
170
171    let mut total_input_tokens: u64 = 0;
172    let mut total_output_tokens: u64 = 0;
173    let mut new_count = 0;
174    let mut update_count = 0;
175    let mut batch_details: Vec<BatchDetail> = Vec::new();
176
177    // Process in batches
178    let total_batches = (parsed_sessions.len() + BATCH_SIZE - 1) / BATCH_SIZE;
179
180    for (batch_idx, batch) in parsed_sessions.chunks(BATCH_SIZE).enumerate() {
181        // Reload existing patterns before each batch (picks up patterns from prior batches)
182        let existing = db::get_patterns(conn, &["discovered", "active"], project)?;
183
184        // Build prompt
185        let prompt = prompts::build_analysis_prompt(batch, &existing, context_summary.as_deref());
186        let prompt_chars = prompt.len();
187
188        on_batch_start(batch_idx, total_batches, batch.len(), prompt_chars);
189
190        // Call AI backend
191        let response = backend.execute(&prompt, Some(ANALYSIS_RESPONSE_SCHEMA))?;
192        total_input_tokens += response.input_tokens;
193        total_output_tokens += response.output_tokens;
194
195        // Parse AI response into AnalysisResponse (reasoning + pattern updates)
196        let analysis_resp = parse_analysis_response(&response.text).map_err(|e| {
197            CoreError::Analysis(format!(
198                "{e}\n(prompt_chars={}, output_tokens={}, result_chars={})",
199                prompt_chars,
200                response.output_tokens,
201                response.text.len()
202            ))
203        })?;
204
205        let reasoning = analysis_resp.reasoning;
206
207        // Apply merge logic
208        let (new_patterns, merge_updates) =
209            merge::process_updates(analysis_resp.patterns, &existing, project);
210
211        let batch_new = new_patterns.len();
212        let batch_updated = merge_updates.len();
213
214        // Store new patterns
215        for pattern in &new_patterns {
216            db::insert_pattern(conn, pattern)?;
217            new_count += 1;
218        }
219
220        // Apply merge updates
221        for update in &merge_updates {
222            db::update_pattern_merge(
223                conn,
224                &update.pattern_id,
225                &update.new_sessions,
226                update.new_confidence,
227                Utc::now(),
228                update.additional_times_seen,
229            )?;
230            update_count += 1;
231        }
232
233        // Collect per-batch diagnostics
234        let preview = truncate_for_error(&response.text, 500).to_string();
235        batch_details.push(BatchDetail {
236            batch_index: batch_idx,
237            session_count: batch.len(),
238            session_ids: batch.iter().map(|s| s.session_id.clone()).collect(),
239            prompt_chars,
240            input_tokens: response.input_tokens,
241            output_tokens: response.output_tokens,
242            new_patterns: batch_new,
243            updated_patterns: batch_updated,
244            reasoning,
245            ai_response_preview: preview,
246        });
247    }
248
249    // Record all sessions as analyzed
250    for ingested in &sessions_to_analyze {
251        db::record_analyzed_session(conn, &ingested.session_id, &ingested.project)?;
252    }
253
254    // Get total pattern count
255    let discovered = db::pattern_count_by_status(conn, "discovered")?;
256    let active = db::pattern_count_by_status(conn, "active")?;
257
258    Ok(AnalyzeResult {
259        sessions_analyzed: analyzed_count,
260        new_patterns: new_count,
261        updated_patterns: update_count,
262        total_patterns: (discovered + active) as usize,
263        input_tokens: total_input_tokens,
264        output_tokens: total_output_tokens,
265        batch_details,
266    })
267}
268
269/// Parse the AI response text into an AnalysisResponse (reasoning + pattern updates).
270/// With `--json-schema` constrained decoding, the response is guaranteed valid JSON.
271fn parse_analysis_response(text: &str) -> Result<AnalysisResponse, CoreError> {
272    let trimmed = text.trim();
273    let response: AnalysisResponse = serde_json::from_str(trimmed).map_err(|e| {
274        CoreError::Analysis(format!(
275            "failed to parse AI response as JSON: {e}\nresponse text: {}",
276            truncate_for_error(text, 1500)
277        ))
278    })?;
279    Ok(response)
280}
281
282fn truncate_for_error(s: &str, max: usize) -> &str {
283    if s.len() <= max {
284        s
285    } else {
286        let mut i = max;
287        while i > 0 && !s.is_char_boundary(i) {
288            i -= 1;
289        }
290        &s[..i]
291    }
292}
293
294#[cfg(test)]
295mod tests {
296    use super::*;
297    use crate::models::PatternUpdate;
298
299    #[test]
300    fn test_parse_analysis_response_json() {
301        let json = r#"{
302            "reasoning": "Found recurring instruction across sessions.",
303            "patterns": [
304                {
305                    "action": "new",
306                    "pattern_type": "repetitive_instruction",
307                    "description": "User always asks to use uv",
308                    "confidence": 0.85,
309                    "source_sessions": ["sess-1"],
310                    "related_files": [],
311                    "suggested_content": "Always use uv",
312                    "suggested_target": "claude_md"
313                },
314                {
315                    "action": "update",
316                    "existing_id": "pat-123",
317                    "new_sessions": ["sess-2"],
318                    "new_confidence": 0.92
319                }
320            ]
321        }"#;
322
323        let resp = parse_analysis_response(json).unwrap();
324        assert_eq!(resp.reasoning, "Found recurring instruction across sessions.");
325        assert_eq!(resp.patterns.len(), 2);
326        assert!(matches!(&resp.patterns[0], PatternUpdate::New(_)));
327        assert!(matches!(&resp.patterns[1], PatternUpdate::Update(_)));
328    }
329
330    #[test]
331    fn test_parse_analysis_response_null_fields() {
332        let json = r#"{
333            "reasoning": "Observed a single pattern.",
334            "patterns": [
335                {
336                    "action": "new",
337                    "pattern_type": "repetitive_instruction",
338                    "description": "Some pattern",
339                    "confidence": 0.8,
340                    "source_sessions": [],
341                    "related_files": [],
342                    "suggested_content": null,
343                    "suggested_target": "claude_md"
344                }
345            ]
346        }"#;
347        let resp = parse_analysis_response(json).unwrap();
348        assert_eq!(resp.patterns.len(), 1);
349        if let PatternUpdate::New(ref p) = resp.patterns[0] {
350            assert_eq!(p.suggested_content, "");
351        } else {
352            panic!("expected New pattern");
353        }
354    }
355
356    #[test]
357    fn test_parse_analysis_response_empty() {
358        let json = r#"{"reasoning": "No recurring patterns found.", "patterns": []}"#;
359        let resp = parse_analysis_response(json).unwrap();
360        assert_eq!(resp.reasoning, "No recurring patterns found.");
361        assert!(resp.patterns.is_empty());
362    }
363
364    #[test]
365    fn test_parse_analysis_response_missing_reasoning_defaults_empty() {
366        let json = r#"{"patterns": []}"#;
367        let resp = parse_analysis_response(json).unwrap();
368        assert_eq!(resp.reasoning, "");
369        assert!(resp.patterns.is_empty());
370    }
371
372    #[test]
373    fn test_parse_analysis_response_pure_prose_fails() {
374        let text = "I analyzed the sessions but found no recurring patterns worth reporting.";
375        let result = parse_analysis_response(text);
376        assert!(result.is_err());
377    }
378
379    #[test]
380    fn test_analysis_response_schema_is_valid_json() {
381        let value: serde_json::Value = serde_json::from_str(ANALYSIS_RESPONSE_SCHEMA)
382            .expect("ANALYSIS_RESPONSE_SCHEMA must be valid JSON");
383        assert_eq!(value["type"], "object");
384        assert!(value["properties"]["patterns"].is_object());
385    }
386}
retro_core/analysis/mod.rs

retro_core/analysis/
mod.rs