1pub mod backend;
2pub mod claude_cli;
3pub mod merge;
4pub mod prompts;
5
6use crate::config::Config;
7use crate::db;
8use crate::errors::CoreError;
9use crate::ingest::{context, session};
10use crate::models::{
11 AnalysisResponse, AnalyzeResult, BatchDetail, Pattern, PatternStatus, PatternType,
12 SuggestedTarget,
13};
14use crate::scrub;
15use chrono::{Duration, Utc};
16use rusqlite::Connection;
17use std::path::Path;
18
19use backend::AnalysisBackend;
20use claude_cli::ClaudeCliBackend;
21
22pub const BATCH_SIZE: usize = 20;
23
24pub const ANALYSIS_RESPONSE_SCHEMA: &str = r#"{
28 "type": "object",
29 "properties": {
30 "reasoning": {"type": "string"},
31 "patterns": {
32 "type": "array",
33 "items": {
34 "type": "object",
35 "properties": {
36 "action": {"type": "string", "enum": ["new", "update"]},
37 "pattern_type": {"type": "string", "enum": ["repetitive_instruction", "recurring_mistake", "workflow_pattern", "stale_context", "redundant_context"]},
38 "description": {"type": "string"},
39 "confidence": {"type": "number"},
40 "source_sessions": {"type": "array", "items": {"type": "string"}},
41 "related_files": {"type": "array", "items": {"type": "string"}},
42 "suggested_content": {"type": "string"},
43 "suggested_target": {"type": "string", "enum": ["skill", "claude_md", "global_agent", "db_only"]},
44 "existing_id": {"type": "string"},
45 "new_sessions": {"type": "array", "items": {"type": "string"}},
46 "new_confidence": {"type": "number"}
47 },
48 "required": ["action"],
49 "additionalProperties": false
50 }
51 }
52 },
53 "required": ["reasoning", "patterns"],
54 "additionalProperties": false
55}"#;
56
57pub fn full_management_analysis_schema() -> String {
60 let mut schema: serde_json::Value = serde_json::from_str(ANALYSIS_RESPONSE_SCHEMA)
61 .expect("ANALYSIS_RESPONSE_SCHEMA must be valid JSON");
62
63 let edits_schema: serde_json::Value = serde_json::json!({
64 "type": "array",
65 "items": {
66 "type": "object",
67 "properties": {
68 "edit_type": {"type": "string", "enum": ["add", "remove", "reword", "move"]},
69 "original_text": {"type": "string"},
70 "suggested_content": {"type": "string"},
71 "target_section": {"type": "string"},
72 "reasoning": {"type": "string"}
73 },
74 "required": ["edit_type", "reasoning"],
75 "additionalProperties": false
76 }
77 });
78
79 schema["properties"]["claude_md_edits"] = edits_schema;
80
81 serde_json::to_string_pretty(&schema).expect("schema serialization cannot fail")
82}
83
84pub fn analyze<F>(
88 conn: &Connection,
89 config: &Config,
90 project: Option<&str>,
91 window_days: u32,
92 on_batch_start: F,
93) -> Result<AnalyzeResult, CoreError>
94where
95 F: Fn(usize, usize, usize, usize),
96{
97 if !ClaudeCliBackend::is_available() {
99 return Err(CoreError::Analysis(
100 "claude CLI not found on PATH. Install Claude Code CLI to use analysis.".to_string(),
101 ));
102 }
103 ClaudeCliBackend::check_auth()?;
108
109 let since = Utc::now() - Duration::days(window_days as i64);
110
111 let rolling = config.analysis.rolling_window;
114 let sessions_to_analyze = db::get_sessions_for_analysis(conn, project, &since, rolling)?;
115
116 if sessions_to_analyze.is_empty() {
117 return Ok(AnalyzeResult {
118 sessions_analyzed: 0,
119 new_patterns: 0,
120 updated_patterns: 0,
121 total_patterns: 0,
122 input_tokens: 0,
123 output_tokens: 0,
124 batch_details: Vec::new(),
125 });
126 }
127
128 let mut parsed_sessions = Vec::new();
130 for ingested in &sessions_to_analyze {
131 let path = Path::new(&ingested.session_path);
132 if !path.exists() {
133 eprintln!(
134 "warning: session file not found: {}",
135 ingested.session_path
136 );
137 continue;
138 }
139
140 match session::parse_session_file(path, &ingested.session_id, &ingested.project) {
141 Ok(mut s) => {
142 if config.privacy.scrub_secrets {
144 scrub::scrub_session(&mut s);
145 }
146 parsed_sessions.push(s);
147 }
148 Err(e) => {
149 eprintln!(
150 "warning: failed to re-parse session {}: {e}",
151 ingested.session_id
152 );
153 }
154 }
155 }
156
157 let before_filter = parsed_sessions.len();
161 parsed_sessions.retain(|s| s.user_messages.len() >= 2);
162 let filtered_out = before_filter - parsed_sessions.len();
163 if filtered_out > 0 {
164 eprintln!(
165 " Skipped {} single-message session{} (no pattern signal)",
166 filtered_out,
167 if filtered_out == 1 { "" } else { "s" }
168 );
169 }
170
171 let analyzed_count = parsed_sessions.len();
172
173 if parsed_sessions.is_empty() {
174 for ingested in &sessions_to_analyze {
176 db::record_analyzed_session(conn, &ingested.session_id, &ingested.project)?;
177 }
178 return Ok(AnalyzeResult {
179 sessions_analyzed: 0,
180 new_patterns: 0,
181 updated_patterns: 0,
182 total_patterns: 0,
183 input_tokens: 0,
184 output_tokens: 0,
185 batch_details: Vec::new(),
186 });
187 }
188
189 let context_summary = match project {
191 Some(project_path) => context::snapshot_context(config, project_path)
192 .ok()
193 .map(|s| prompts::build_context_summary(&s))
194 .filter(|s| !s.is_empty()),
195 None => None,
196 };
197
198 let backend = ClaudeCliBackend::new(&config.ai);
200
201 let mut total_input_tokens: u64 = 0;
202 let mut total_output_tokens: u64 = 0;
203 let mut new_count = 0;
204 let mut update_count = 0;
205 let mut batch_details: Vec<BatchDetail> = Vec::new();
206
207 let total_batches = (parsed_sessions.len() + BATCH_SIZE - 1) / BATCH_SIZE;
209
210 for (batch_idx, batch) in parsed_sessions.chunks(BATCH_SIZE).enumerate() {
211 let existing = db::get_patterns(conn, &["discovered", "active"], project)?;
213
214 let full_mgmt = config.claude_md.full_management;
216 let prompt = prompts::build_analysis_prompt(batch, &existing, context_summary.as_deref(), full_mgmt);
217 let prompt_chars = prompt.len();
218
219 on_batch_start(batch_idx, total_batches, batch.len(), prompt_chars);
220
221 let schema_string;
223 let schema: &str = if full_mgmt {
224 schema_string = full_management_analysis_schema();
225 &schema_string
226 } else {
227 ANALYSIS_RESPONSE_SCHEMA
228 };
229
230 let response = backend.execute(&prompt, Some(schema))?;
232 total_input_tokens += response.input_tokens;
233 total_output_tokens += response.output_tokens;
234
235 let analysis_resp = parse_analysis_response(&response.text).map_err(|e| {
237 CoreError::Analysis(format!(
238 "{e}\n(prompt_chars={}, output_tokens={}, result_chars={})",
239 prompt_chars,
240 response.output_tokens,
241 response.text.len()
242 ))
243 })?;
244
245 let reasoning = analysis_resp.reasoning;
246 let claude_md_edits = analysis_resp.claude_md_edits;
247
248 let (new_patterns, merge_updates) =
250 merge::process_updates(analysis_resp.patterns, &existing, project);
251
252 let mut batch_new = new_patterns.len();
253 let batch_updated = merge_updates.len();
254
255 for pattern in &new_patterns {
257 db::insert_pattern(conn, pattern)?;
258 new_count += 1;
259 }
260
261 for update in &merge_updates {
263 db::update_pattern_merge(
264 conn,
265 &update.pattern_id,
266 &update.new_sessions,
267 update.new_confidence,
268 Utc::now(),
269 update.additional_times_seen,
270 )?;
271 update_count += 1;
272 }
273
274 for edit in &claude_md_edits {
276 let edit_json = serde_json::json!({
277 "edit_type": edit.edit_type.to_string(),
278 "original": edit.original_text,
279 "replacement": edit.suggested_content,
280 "target_section": edit.target_section,
281 "reasoning": edit.reasoning,
282 });
283
284 let description = format!(
285 "[edit:{}] {}",
286 edit.edit_type,
287 edit.original_text
288 );
289
290 let now = Utc::now();
291 let pattern = Pattern {
292 id: uuid::Uuid::new_v4().to_string(),
293 pattern_type: PatternType::RedundantContext,
294 description,
295 confidence: 0.75,
296 times_seen: 1,
297 first_seen: now,
298 last_seen: now,
299 last_projected: None,
300 status: PatternStatus::Discovered,
301 source_sessions: batch.iter().map(|s| s.session_id.clone()).collect(),
302 related_files: Vec::new(),
303 suggested_content: edit_json.to_string(),
304 suggested_target: SuggestedTarget::ClaudeMd,
305 project: project.map(String::from),
306 generation_failed: false,
307 };
308
309 db::insert_pattern(conn, &pattern)?;
310 new_count += 1;
311 batch_new += 1;
312 }
313
314 let preview = truncate_for_error(&response.text, 500).to_string();
316 batch_details.push(BatchDetail {
317 batch_index: batch_idx,
318 session_count: batch.len(),
319 session_ids: batch.iter().map(|s| s.session_id.clone()).collect(),
320 prompt_chars,
321 input_tokens: response.input_tokens,
322 output_tokens: response.output_tokens,
323 new_patterns: batch_new,
324 updated_patterns: batch_updated,
325 reasoning,
326 ai_response_preview: preview,
327 });
328 }
329
330 for ingested in &sessions_to_analyze {
332 db::record_analyzed_session(conn, &ingested.session_id, &ingested.project)?;
333 }
334
335 let discovered = db::pattern_count_by_status(conn, "discovered")?;
337 let active = db::pattern_count_by_status(conn, "active")?;
338
339 Ok(AnalyzeResult {
340 sessions_analyzed: analyzed_count,
341 new_patterns: new_count,
342 updated_patterns: update_count,
343 total_patterns: (discovered + active) as usize,
344 input_tokens: total_input_tokens,
345 output_tokens: total_output_tokens,
346 batch_details,
347 })
348}
349
350fn parse_analysis_response(text: &str) -> Result<AnalysisResponse, CoreError> {
353 let trimmed = text.trim();
354 let response: AnalysisResponse = serde_json::from_str(trimmed).map_err(|e| {
355 CoreError::Analysis(format!(
356 "failed to parse AI response as JSON: {e}\nresponse text: {}",
357 truncate_for_error(text, 1500)
358 ))
359 })?;
360 Ok(response)
361}
362
363fn truncate_for_error(s: &str, max: usize) -> &str {
364 if s.len() <= max {
365 s
366 } else {
367 let mut i = max;
368 while i > 0 && !s.is_char_boundary(i) {
369 i -= 1;
370 }
371 &s[..i]
372 }
373}
374
375#[cfg(test)]
376mod tests {
377 use super::*;
378 use crate::models::PatternUpdate;
379
380 #[test]
381 fn test_parse_analysis_response_json() {
382 let json = r#"{
383 "reasoning": "Found recurring instruction across sessions.",
384 "patterns": [
385 {
386 "action": "new",
387 "pattern_type": "repetitive_instruction",
388 "description": "User always asks to use uv",
389 "confidence": 0.85,
390 "source_sessions": ["sess-1"],
391 "related_files": [],
392 "suggested_content": "Always use uv",
393 "suggested_target": "claude_md"
394 },
395 {
396 "action": "update",
397 "existing_id": "pat-123",
398 "new_sessions": ["sess-2"],
399 "new_confidence": 0.92
400 }
401 ]
402 }"#;
403
404 let resp = parse_analysis_response(json).unwrap();
405 assert_eq!(resp.reasoning, "Found recurring instruction across sessions.");
406 assert_eq!(resp.patterns.len(), 2);
407 assert!(matches!(&resp.patterns[0], PatternUpdate::New(_)));
408 assert!(matches!(&resp.patterns[1], PatternUpdate::Update(_)));
409 }
410
411 #[test]
412 fn test_parse_analysis_response_null_fields() {
413 let json = r#"{
414 "reasoning": "Observed a single pattern.",
415 "patterns": [
416 {
417 "action": "new",
418 "pattern_type": "repetitive_instruction",
419 "description": "Some pattern",
420 "confidence": 0.8,
421 "source_sessions": [],
422 "related_files": [],
423 "suggested_content": null,
424 "suggested_target": "claude_md"
425 }
426 ]
427 }"#;
428 let resp = parse_analysis_response(json).unwrap();
429 assert_eq!(resp.patterns.len(), 1);
430 if let PatternUpdate::New(ref p) = resp.patterns[0] {
431 assert_eq!(p.suggested_content, "");
432 } else {
433 panic!("expected New pattern");
434 }
435 }
436
437 #[test]
438 fn test_parse_analysis_response_empty() {
439 let json = r#"{"reasoning": "No recurring patterns found.", "patterns": []}"#;
440 let resp = parse_analysis_response(json).unwrap();
441 assert_eq!(resp.reasoning, "No recurring patterns found.");
442 assert!(resp.patterns.is_empty());
443 }
444
445 #[test]
446 fn test_parse_analysis_response_missing_reasoning_defaults_empty() {
447 let json = r#"{"patterns": []}"#;
448 let resp = parse_analysis_response(json).unwrap();
449 assert_eq!(resp.reasoning, "");
450 assert!(resp.patterns.is_empty());
451 }
452
453 #[test]
454 fn test_parse_analysis_response_pure_prose_fails() {
455 let text = "I analyzed the sessions but found no recurring patterns worth reporting.";
456 let result = parse_analysis_response(text);
457 assert!(result.is_err());
458 }
459
460 #[test]
461 fn test_analysis_response_schema_is_valid_json() {
462 let value: serde_json::Value = serde_json::from_str(ANALYSIS_RESPONSE_SCHEMA)
463 .expect("ANALYSIS_RESPONSE_SCHEMA must be valid JSON");
464 assert_eq!(value["type"], "object");
465 assert!(value["properties"]["patterns"].is_object());
466 }
467
468 #[test]
469 fn test_full_management_analysis_schema_is_valid_json() {
470 let schema_str = full_management_analysis_schema();
471 let value: serde_json::Value =
472 serde_json::from_str(&schema_str).expect("full_management schema must be valid JSON");
473 assert_eq!(value["type"], "object");
474 assert!(value["properties"]["patterns"].is_object());
475 }
476
477 #[test]
478 fn test_full_management_analysis_schema_contains_claude_md_edits() {
479 let schema_str = full_management_analysis_schema();
480 let value: serde_json::Value = serde_json::from_str(&schema_str).unwrap();
481
482 let edits = &value["properties"]["claude_md_edits"];
484 assert!(edits.is_object(), "claude_md_edits should be in properties");
485 assert_eq!(edits["type"], "array");
486
487 let items = &edits["items"];
489 assert_eq!(items["type"], "object");
490 let required: Vec<String> = items["required"]
491 .as_array()
492 .unwrap()
493 .iter()
494 .map(|v| v.as_str().unwrap().to_string())
495 .collect();
496 assert!(required.contains(&"edit_type".to_string()));
497 assert!(required.contains(&"reasoning".to_string()));
498
499 let edit_type_enum = items["properties"]["edit_type"]["enum"]
501 .as_array()
502 .unwrap();
503 let enum_values: Vec<&str> = edit_type_enum.iter().map(|v| v.as_str().unwrap()).collect();
504 assert!(enum_values.contains(&"add"));
505 assert!(enum_values.contains(&"remove"));
506 assert!(enum_values.contains(&"reword"));
507 assert!(enum_values.contains(&"move"));
508
509 assert_eq!(items["additionalProperties"], false);
511 }
512
513 #[test]
514 fn test_full_management_schema_claude_md_edits_not_required() {
515 let schema_str = full_management_analysis_schema();
516 let value: serde_json::Value = serde_json::from_str(&schema_str).unwrap();
517
518 let required: Vec<String> = value["required"]
520 .as_array()
521 .unwrap()
522 .iter()
523 .map(|v| v.as_str().unwrap().to_string())
524 .collect();
525 assert!(
526 !required.contains(&"claude_md_edits".to_string()),
527 "claude_md_edits should NOT be in top-level required"
528 );
529 assert!(required.contains(&"reasoning".to_string()));
531 assert!(required.contains(&"patterns".to_string()));
532 }
533
534 #[test]
535 fn test_full_management_schema_preserves_base_patterns() {
536 let base: serde_json::Value = serde_json::from_str(ANALYSIS_RESPONSE_SCHEMA).unwrap();
538 let full: serde_json::Value =
539 serde_json::from_str(&full_management_analysis_schema()).unwrap();
540
541 assert_eq!(
542 base["properties"]["patterns"],
543 full["properties"]["patterns"],
544 "patterns schema should be identical between base and full_management"
545 );
546 assert_eq!(
547 base["properties"]["reasoning"],
548 full["properties"]["reasoning"],
549 "reasoning schema should be identical"
550 );
551 }
552}