1pub mod backend;
2pub mod claude_cli;
3pub mod merge;
4pub mod prompts;
5
6use crate::config::Config;
7use crate::db;
8use crate::errors::CoreError;
9use crate::ingest::{context, session};
10use crate::models::{AnalysisResponse, AnalyzeResult, BatchDetail};
11use crate::scrub;
12use chrono::{Duration, Utc};
13use rusqlite::Connection;
14use std::path::Path;
15
16use backend::AnalysisBackend;
17use claude_cli::ClaudeCliBackend;
18
19pub const BATCH_SIZE: usize = 20;
20
21pub const ANALYSIS_RESPONSE_SCHEMA: &str = r#"{
25 "type": "object",
26 "properties": {
27 "reasoning": {"type": "string"},
28 "patterns": {
29 "type": "array",
30 "items": {
31 "type": "object",
32 "properties": {
33 "action": {"type": "string", "enum": ["new", "update"]},
34 "pattern_type": {"type": "string", "enum": ["repetitive_instruction", "recurring_mistake", "workflow_pattern", "stale_context", "redundant_context"]},
35 "description": {"type": "string"},
36 "confidence": {"type": "number"},
37 "source_sessions": {"type": "array", "items": {"type": "string"}},
38 "related_files": {"type": "array", "items": {"type": "string"}},
39 "suggested_content": {"type": "string"},
40 "suggested_target": {"type": "string", "enum": ["skill", "claude_md", "global_agent", "db_only"]},
41 "existing_id": {"type": "string"},
42 "new_sessions": {"type": "array", "items": {"type": "string"}},
43 "new_confidence": {"type": "number"}
44 },
45 "required": ["action"],
46 "additionalProperties": false
47 }
48 }
49 },
50 "required": ["reasoning", "patterns"],
51 "additionalProperties": false
52}"#;
53
54pub fn analyze<F>(
58 conn: &Connection,
59 config: &Config,
60 project: Option<&str>,
61 window_days: u32,
62 on_batch_start: F,
63) -> Result<AnalyzeResult, CoreError>
64where
65 F: Fn(usize, usize, usize, usize),
66{
67 if !ClaudeCliBackend::is_available() {
69 return Err(CoreError::Analysis(
70 "claude CLI not found on PATH. Install Claude Code CLI to use analysis.".to_string(),
71 ));
72 }
73 ClaudeCliBackend::check_auth()?;
78
79 let since = Utc::now() - Duration::days(window_days as i64);
80
81 let rolling = config.analysis.rolling_window;
84 let sessions_to_analyze = db::get_sessions_for_analysis(conn, project, &since, rolling)?;
85
86 if sessions_to_analyze.is_empty() {
87 return Ok(AnalyzeResult {
88 sessions_analyzed: 0,
89 new_patterns: 0,
90 updated_patterns: 0,
91 total_patterns: 0,
92 input_tokens: 0,
93 output_tokens: 0,
94 batch_details: Vec::new(),
95 });
96 }
97
98 let mut parsed_sessions = Vec::new();
100 for ingested in &sessions_to_analyze {
101 let path = Path::new(&ingested.session_path);
102 if !path.exists() {
103 eprintln!(
104 "warning: session file not found: {}",
105 ingested.session_path
106 );
107 continue;
108 }
109
110 match session::parse_session_file(path, &ingested.session_id, &ingested.project) {
111 Ok(mut s) => {
112 if config.privacy.scrub_secrets {
114 scrub::scrub_session(&mut s);
115 }
116 parsed_sessions.push(s);
117 }
118 Err(e) => {
119 eprintln!(
120 "warning: failed to re-parse session {}: {e}",
121 ingested.session_id
122 );
123 }
124 }
125 }
126
127 let before_filter = parsed_sessions.len();
131 parsed_sessions.retain(|s| s.user_messages.len() >= 2);
132 let filtered_out = before_filter - parsed_sessions.len();
133 if filtered_out > 0 {
134 eprintln!(
135 " Skipped {} single-message session{} (no pattern signal)",
136 filtered_out,
137 if filtered_out == 1 { "" } else { "s" }
138 );
139 }
140
141 let analyzed_count = parsed_sessions.len();
142
143 if parsed_sessions.is_empty() {
144 for ingested in &sessions_to_analyze {
146 db::record_analyzed_session(conn, &ingested.session_id, &ingested.project)?;
147 }
148 return Ok(AnalyzeResult {
149 sessions_analyzed: 0,
150 new_patterns: 0,
151 updated_patterns: 0,
152 total_patterns: 0,
153 input_tokens: 0,
154 output_tokens: 0,
155 batch_details: Vec::new(),
156 });
157 }
158
159 let context_summary = match project {
161 Some(project_path) => context::snapshot_context(config, project_path)
162 .ok()
163 .map(|s| prompts::build_context_summary(&s))
164 .filter(|s| !s.is_empty()),
165 None => None,
166 };
167
168 let backend = ClaudeCliBackend::new(&config.ai);
170
171 let mut total_input_tokens: u64 = 0;
172 let mut total_output_tokens: u64 = 0;
173 let mut new_count = 0;
174 let mut update_count = 0;
175 let mut batch_details: Vec<BatchDetail> = Vec::new();
176
177 let total_batches = (parsed_sessions.len() + BATCH_SIZE - 1) / BATCH_SIZE;
179
180 for (batch_idx, batch) in parsed_sessions.chunks(BATCH_SIZE).enumerate() {
181 let existing = db::get_patterns(conn, &["discovered", "active"], project)?;
183
184 let prompt = prompts::build_analysis_prompt(batch, &existing, context_summary.as_deref());
186 let prompt_chars = prompt.len();
187
188 on_batch_start(batch_idx, total_batches, batch.len(), prompt_chars);
189
190 let response = backend.execute(&prompt, Some(ANALYSIS_RESPONSE_SCHEMA))?;
192 total_input_tokens += response.input_tokens;
193 total_output_tokens += response.output_tokens;
194
195 let analysis_resp = parse_analysis_response(&response.text).map_err(|e| {
197 CoreError::Analysis(format!(
198 "{e}\n(prompt_chars={}, output_tokens={}, result_chars={})",
199 prompt_chars,
200 response.output_tokens,
201 response.text.len()
202 ))
203 })?;
204
205 let reasoning = analysis_resp.reasoning;
206
207 let (new_patterns, merge_updates) =
209 merge::process_updates(analysis_resp.patterns, &existing, project);
210
211 let batch_new = new_patterns.len();
212 let batch_updated = merge_updates.len();
213
214 for pattern in &new_patterns {
216 db::insert_pattern(conn, pattern)?;
217 new_count += 1;
218 }
219
220 for update in &merge_updates {
222 db::update_pattern_merge(
223 conn,
224 &update.pattern_id,
225 &update.new_sessions,
226 update.new_confidence,
227 Utc::now(),
228 update.additional_times_seen,
229 )?;
230 update_count += 1;
231 }
232
233 let preview = truncate_for_error(&response.text, 500).to_string();
235 batch_details.push(BatchDetail {
236 batch_index: batch_idx,
237 session_count: batch.len(),
238 session_ids: batch.iter().map(|s| s.session_id.clone()).collect(),
239 prompt_chars,
240 input_tokens: response.input_tokens,
241 output_tokens: response.output_tokens,
242 new_patterns: batch_new,
243 updated_patterns: batch_updated,
244 reasoning,
245 ai_response_preview: preview,
246 });
247 }
248
249 for ingested in &sessions_to_analyze {
251 db::record_analyzed_session(conn, &ingested.session_id, &ingested.project)?;
252 }
253
254 let discovered = db::pattern_count_by_status(conn, "discovered")?;
256 let active = db::pattern_count_by_status(conn, "active")?;
257
258 Ok(AnalyzeResult {
259 sessions_analyzed: analyzed_count,
260 new_patterns: new_count,
261 updated_patterns: update_count,
262 total_patterns: (discovered + active) as usize,
263 input_tokens: total_input_tokens,
264 output_tokens: total_output_tokens,
265 batch_details,
266 })
267}
268
269fn parse_analysis_response(text: &str) -> Result<AnalysisResponse, CoreError> {
272 let trimmed = text.trim();
273 let response: AnalysisResponse = serde_json::from_str(trimmed).map_err(|e| {
274 CoreError::Analysis(format!(
275 "failed to parse AI response as JSON: {e}\nresponse text: {}",
276 truncate_for_error(text, 1500)
277 ))
278 })?;
279 Ok(response)
280}
281
282fn truncate_for_error(s: &str, max: usize) -> &str {
283 if s.len() <= max {
284 s
285 } else {
286 let mut i = max;
287 while i > 0 && !s.is_char_boundary(i) {
288 i -= 1;
289 }
290 &s[..i]
291 }
292}
293
294#[cfg(test)]
295mod tests {
296 use super::*;
297 use crate::models::PatternUpdate;
298
299 #[test]
300 fn test_parse_analysis_response_json() {
301 let json = r#"{
302 "reasoning": "Found recurring instruction across sessions.",
303 "patterns": [
304 {
305 "action": "new",
306 "pattern_type": "repetitive_instruction",
307 "description": "User always asks to use uv",
308 "confidence": 0.85,
309 "source_sessions": ["sess-1"],
310 "related_files": [],
311 "suggested_content": "Always use uv",
312 "suggested_target": "claude_md"
313 },
314 {
315 "action": "update",
316 "existing_id": "pat-123",
317 "new_sessions": ["sess-2"],
318 "new_confidence": 0.92
319 }
320 ]
321 }"#;
322
323 let resp = parse_analysis_response(json).unwrap();
324 assert_eq!(resp.reasoning, "Found recurring instruction across sessions.");
325 assert_eq!(resp.patterns.len(), 2);
326 assert!(matches!(&resp.patterns[0], PatternUpdate::New(_)));
327 assert!(matches!(&resp.patterns[1], PatternUpdate::Update(_)));
328 }
329
330 #[test]
331 fn test_parse_analysis_response_null_fields() {
332 let json = r#"{
333 "reasoning": "Observed a single pattern.",
334 "patterns": [
335 {
336 "action": "new",
337 "pattern_type": "repetitive_instruction",
338 "description": "Some pattern",
339 "confidence": 0.8,
340 "source_sessions": [],
341 "related_files": [],
342 "suggested_content": null,
343 "suggested_target": "claude_md"
344 }
345 ]
346 }"#;
347 let resp = parse_analysis_response(json).unwrap();
348 assert_eq!(resp.patterns.len(), 1);
349 if let PatternUpdate::New(ref p) = resp.patterns[0] {
350 assert_eq!(p.suggested_content, "");
351 } else {
352 panic!("expected New pattern");
353 }
354 }
355
356 #[test]
357 fn test_parse_analysis_response_empty() {
358 let json = r#"{"reasoning": "No recurring patterns found.", "patterns": []}"#;
359 let resp = parse_analysis_response(json).unwrap();
360 assert_eq!(resp.reasoning, "No recurring patterns found.");
361 assert!(resp.patterns.is_empty());
362 }
363
364 #[test]
365 fn test_parse_analysis_response_missing_reasoning_defaults_empty() {
366 let json = r#"{"patterns": []}"#;
367 let resp = parse_analysis_response(json).unwrap();
368 assert_eq!(resp.reasoning, "");
369 assert!(resp.patterns.is_empty());
370 }
371
372 #[test]
373 fn test_parse_analysis_response_pure_prose_fails() {
374 let text = "I analyzed the sessions but found no recurring patterns worth reporting.";
375 let result = parse_analysis_response(text);
376 assert!(result.is_err());
377 }
378
379 #[test]
380 fn test_analysis_response_schema_is_valid_json() {
381 let value: serde_json::Value = serde_json::from_str(ANALYSIS_RESPONSE_SCHEMA)
382 .expect("ANALYSIS_RESPONSE_SCHEMA must be valid JSON");
383 assert_eq!(value["type"], "object");
384 assert!(value["properties"]["patterns"].is_object());
385 }
386}