1pub mod backend;
2pub mod claude_cli;
3pub mod merge;
4pub mod prompts;
5
6use crate::config::Config;
7use crate::db;
8use crate::errors::CoreError;
9use crate::ingest::{context, session};
10use crate::models::{
11 AnalysisResponse, AnalyzeResult, BatchDetail, EdgeType, GraphAnalysisResponse, GraphOperation,
12 NodeScope, NodeType, Pattern, PatternStatus, PatternType, SuggestedTarget,
13};
14use crate::scrub;
15use chrono::{Duration, Utc};
16use rusqlite::Connection;
17use std::path::Path;
18
19use backend::AnalysisBackend;
20use claude_cli::ClaudeCliBackend;
21
22pub const BATCH_SIZE: usize = 20;
23
24pub const ANALYSIS_RESPONSE_SCHEMA: &str = r#"{
28 "type": "object",
29 "properties": {
30 "reasoning": {"type": "string"},
31 "patterns": {
32 "type": "array",
33 "items": {
34 "type": "object",
35 "properties": {
36 "action": {"type": "string", "enum": ["new", "update"]},
37 "pattern_type": {"type": "string", "enum": ["repetitive_instruction", "recurring_mistake", "workflow_pattern", "stale_context", "redundant_context"]},
38 "description": {"type": "string"},
39 "confidence": {"type": "number"},
40 "source_sessions": {"type": "array", "items": {"type": "string"}},
41 "related_files": {"type": "array", "items": {"type": "string"}},
42 "suggested_content": {"type": "string"},
43 "suggested_target": {"type": "string", "enum": ["skill", "claude_md", "global_agent", "db_only"]},
44 "existing_id": {"type": "string"},
45 "new_sessions": {"type": "array", "items": {"type": "string"}},
46 "new_confidence": {"type": "number"}
47 },
48 "required": ["action"],
49 "additionalProperties": false
50 }
51 }
52 },
53 "required": ["reasoning", "patterns"],
54 "additionalProperties": false
55}"#;
56
57pub const GRAPH_ANALYSIS_RESPONSE_SCHEMA: &str = r#"{
59 "type": "object",
60 "properties": {
61 "reasoning": { "type": "string", "description": "1-2 sentence summary of what you observed" },
62 "operations": {
63 "type": "array",
64 "items": {
65 "type": "object",
66 "properties": {
67 "action": { "type": "string", "enum": ["create_node", "update_node", "create_edge", "merge_nodes"] },
68 "node_type": { "type": "string", "enum": ["preference", "pattern", "rule", "skill", "memory", "directive"] },
69 "scope": { "type": "string", "enum": ["global", "project"] },
70 "project_id": { "type": "string" },
71 "content": { "type": "string" },
72 "confidence": { "type": "number", "minimum": 0.0, "maximum": 1.0 },
73 "node_id": { "type": "string" },
74 "new_confidence": { "type": "number", "minimum": 0.0, "maximum": 1.0 },
75 "new_content": { "type": "string" },
76 "source_id": { "type": "string" },
77 "target_id": { "type": "string" },
78 "edge_type": { "type": "string", "enum": ["supports", "contradicts", "supersedes", "derived_from", "applies_to"] },
79 "keep_id": { "type": "string" },
80 "remove_id": { "type": "string" }
81 },
82 "required": ["action"],
83 "additionalProperties": false
84 }
85 }
86 },
87 "required": ["reasoning", "operations"],
88 "additionalProperties": false
89}"#;
90
91pub fn full_management_analysis_schema() -> String {
94 let mut schema: serde_json::Value = serde_json::from_str(ANALYSIS_RESPONSE_SCHEMA)
95 .expect("ANALYSIS_RESPONSE_SCHEMA must be valid JSON");
96
97 let edits_schema: serde_json::Value = serde_json::json!({
98 "type": "array",
99 "items": {
100 "type": "object",
101 "properties": {
102 "edit_type": {"type": "string", "enum": ["add", "remove", "reword", "move"]},
103 "original_text": {"type": "string"},
104 "suggested_content": {"type": "string"},
105 "target_section": {"type": "string"},
106 "reasoning": {"type": "string"}
107 },
108 "required": ["edit_type", "reasoning"],
109 "additionalProperties": false
110 }
111 });
112
113 schema["properties"]["claude_md_edits"] = edits_schema;
114
115 serde_json::to_string_pretty(&schema).expect("schema serialization cannot fail")
116}
117
118pub fn analyze<F>(
122 conn: &Connection,
123 config: &Config,
124 project: Option<&str>,
125 window_days: u32,
126 on_batch_start: F,
127) -> Result<AnalyzeResult, CoreError>
128where
129 F: Fn(usize, usize, usize, usize),
130{
131 if !ClaudeCliBackend::is_available() {
133 return Err(CoreError::Analysis(
134 "claude CLI not found on PATH. Install Claude Code CLI to use analysis.".to_string(),
135 ));
136 }
137 ClaudeCliBackend::check_auth()?;
142
143 let since = Utc::now() - Duration::days(window_days as i64);
144
145 let rolling = config.analysis.rolling_window;
148 let sessions_to_analyze = db::get_sessions_for_analysis(conn, project, &since, rolling)?;
149
150 if sessions_to_analyze.is_empty() {
151 return Ok(AnalyzeResult {
152 sessions_analyzed: 0,
153 new_patterns: 0,
154 updated_patterns: 0,
155 total_patterns: 0,
156 input_tokens: 0,
157 output_tokens: 0,
158 batch_details: Vec::new(),
159 });
160 }
161
162 let mut parsed_sessions = Vec::new();
164 for ingested in &sessions_to_analyze {
165 let path = Path::new(&ingested.session_path);
166 if !path.exists() {
167 eprintln!(
168 "warning: session file not found: {}",
169 ingested.session_path
170 );
171 continue;
172 }
173
174 match session::parse_session_file(path, &ingested.session_id, &ingested.project) {
175 Ok(mut s) => {
176 if config.privacy.scrub_secrets {
178 scrub::scrub_session(&mut s);
179 }
180 parsed_sessions.push(s);
181 }
182 Err(e) => {
183 eprintln!(
184 "warning: failed to re-parse session {}: {e}",
185 ingested.session_id
186 );
187 }
188 }
189 }
190
191 let before_filter = parsed_sessions.len();
195 parsed_sessions.retain(|s| s.user_messages.len() >= 2);
196 let filtered_out = before_filter - parsed_sessions.len();
197 if filtered_out > 0 {
198 eprintln!(
199 " Skipped {} single-message session{} (no pattern signal)",
200 filtered_out,
201 if filtered_out == 1 { "" } else { "s" }
202 );
203 }
204
205 let analyzed_count = parsed_sessions.len();
206
207 if parsed_sessions.is_empty() {
208 for ingested in &sessions_to_analyze {
210 db::record_analyzed_session(conn, &ingested.session_id, &ingested.project)?;
211 }
212 return Ok(AnalyzeResult {
213 sessions_analyzed: 0,
214 new_patterns: 0,
215 updated_patterns: 0,
216 total_patterns: 0,
217 input_tokens: 0,
218 output_tokens: 0,
219 batch_details: Vec::new(),
220 });
221 }
222
223 let context_summary = match project {
225 Some(project_path) => context::snapshot_context(config, project_path)
226 .ok()
227 .map(|s| prompts::build_context_summary(&s))
228 .filter(|s| !s.is_empty()),
229 None => None,
230 };
231
232 let backend = ClaudeCliBackend::new(&config.ai);
234
235 let mut total_input_tokens: u64 = 0;
236 let mut total_output_tokens: u64 = 0;
237 let mut new_count = 0;
238 let mut update_count = 0;
239 let mut batch_details: Vec<BatchDetail> = Vec::new();
240
241 let total_batches = (parsed_sessions.len() + BATCH_SIZE - 1) / BATCH_SIZE;
243
244 for (batch_idx, batch) in parsed_sessions.chunks(BATCH_SIZE).enumerate() {
245 let existing = db::get_patterns(conn, &["discovered", "active"], project)?;
247
248 let full_mgmt = config.claude_md.full_management;
250 let prompt = prompts::build_analysis_prompt(batch, &existing, context_summary.as_deref(), full_mgmt);
251 let prompt_chars = prompt.len();
252
253 on_batch_start(batch_idx, total_batches, batch.len(), prompt_chars);
254
255 let schema_string;
257 let schema: &str = if full_mgmt {
258 schema_string = full_management_analysis_schema();
259 &schema_string
260 } else {
261 ANALYSIS_RESPONSE_SCHEMA
262 };
263
264 let response = backend.execute(&prompt, Some(schema))?;
266 total_input_tokens += response.input_tokens;
267 total_output_tokens += response.output_tokens;
268
269 let analysis_resp = parse_analysis_response(&response.text).map_err(|e| {
271 CoreError::Analysis(format!(
272 "{e}\n(prompt_chars={}, output_tokens={}, result_chars={})",
273 prompt_chars,
274 response.output_tokens,
275 response.text.len()
276 ))
277 })?;
278
279 let reasoning = analysis_resp.reasoning;
280 let claude_md_edits = analysis_resp.claude_md_edits;
281
282 let (new_patterns, merge_updates) =
284 merge::process_updates(analysis_resp.patterns, &existing, project);
285
286 let mut batch_new = new_patterns.len();
287 let batch_updated = merge_updates.len();
288
289 for pattern in &new_patterns {
291 db::insert_pattern(conn, pattern)?;
292 new_count += 1;
293 }
294
295 for update in &merge_updates {
297 db::update_pattern_merge(
298 conn,
299 &update.pattern_id,
300 &update.new_sessions,
301 update.new_confidence,
302 Utc::now(),
303 update.additional_times_seen,
304 )?;
305 update_count += 1;
306 }
307
308 for edit in &claude_md_edits {
310 let edit_json = serde_json::json!({
311 "edit_type": edit.edit_type.to_string(),
312 "original": edit.original_text,
313 "replacement": edit.suggested_content,
314 "target_section": edit.target_section,
315 "reasoning": edit.reasoning,
316 });
317
318 let description = format!(
319 "[edit:{}] {}",
320 edit.edit_type,
321 edit.original_text
322 );
323
324 let now = Utc::now();
325 let pattern = Pattern {
326 id: uuid::Uuid::new_v4().to_string(),
327 pattern_type: PatternType::RedundantContext,
328 description,
329 confidence: 0.75,
330 times_seen: 1,
331 first_seen: now,
332 last_seen: now,
333 last_projected: None,
334 status: PatternStatus::Discovered,
335 source_sessions: batch.iter().map(|s| s.session_id.clone()).collect(),
336 related_files: Vec::new(),
337 suggested_content: edit_json.to_string(),
338 suggested_target: SuggestedTarget::ClaudeMd,
339 project: project.map(String::from),
340 generation_failed: false,
341 };
342
343 db::insert_pattern(conn, &pattern)?;
344 new_count += 1;
345 batch_new += 1;
346 }
347
348 let preview = truncate_for_error(&response.text, 500).to_string();
350 batch_details.push(BatchDetail {
351 batch_index: batch_idx,
352 session_count: batch.len(),
353 session_ids: batch.iter().map(|s| s.session_id.clone()).collect(),
354 prompt_chars,
355 input_tokens: response.input_tokens,
356 output_tokens: response.output_tokens,
357 new_patterns: batch_new,
358 updated_patterns: batch_updated,
359 reasoning,
360 ai_response_preview: preview,
361 });
362 }
363
364 for ingested in &sessions_to_analyze {
366 db::record_analyzed_session(conn, &ingested.session_id, &ingested.project)?;
367 }
368
369 let discovered = db::pattern_count_by_status(conn, "discovered")?;
371 let active = db::pattern_count_by_status(conn, "active")?;
372
373 Ok(AnalyzeResult {
374 sessions_analyzed: analyzed_count,
375 new_patterns: new_count,
376 updated_patterns: update_count,
377 total_patterns: (discovered + active) as usize,
378 input_tokens: total_input_tokens,
379 output_tokens: total_output_tokens,
380 batch_details,
381 })
382}
383
384fn parse_analysis_response(text: &str) -> Result<AnalysisResponse, CoreError> {
387 let trimmed = text.trim();
388 let response: AnalysisResponse = serde_json::from_str(trimmed).map_err(|e| {
389 CoreError::Analysis(format!(
390 "failed to parse AI response as JSON: {e}\nresponse text: {}",
391 truncate_for_error(text, 1500)
392 ))
393 })?;
394 Ok(response)
395}
396
397pub fn parse_graph_response(json: &str, default_project: Option<&str>) -> Result<Vec<GraphOperation>, CoreError> {
399 let response: GraphAnalysisResponse = serde_json::from_str(json)
400 .map_err(|e| CoreError::Parse(format!("failed to parse graph analysis response: {e}")))?;
401
402 let mut ops = Vec::new();
403 for op_resp in &response.operations {
404 match op_resp.action.as_str() {
405 "create_node" => {
406 let node_type = op_resp.node_type.as_deref()
407 .map(NodeType::from_str)
408 .unwrap_or(NodeType::Pattern);
409 let scope = op_resp.scope.as_deref()
410 .map(NodeScope::from_str)
411 .unwrap_or(NodeScope::Project);
412 let project_id = match scope {
413 NodeScope::Global => None,
414 NodeScope::Project => op_resp.project_id.clone()
415 .or_else(|| default_project.map(String::from)),
416 };
417 ops.push(GraphOperation::CreateNode {
418 node_type,
419 scope,
420 project_id,
421 content: op_resp.content.clone().unwrap_or_default(),
422 confidence: op_resp.confidence.unwrap_or(0.5),
423 });
424 }
425 "update_node" => {
426 if let Some(id) = &op_resp.node_id {
427 ops.push(GraphOperation::UpdateNode {
428 id: id.clone(),
429 confidence: op_resp.new_confidence,
430 content: op_resp.new_content.clone(),
431 });
432 }
433 }
434 "create_edge" => {
435 if let (Some(source), Some(target)) = (&op_resp.source_id, &op_resp.target_id) {
436 let edge_type = op_resp.edge_type.as_deref()
437 .and_then(EdgeType::from_str)
438 .unwrap_or(EdgeType::Supports);
439 ops.push(GraphOperation::CreateEdge {
440 source_id: source.clone(),
441 target_id: target.clone(),
442 edge_type,
443 });
444 }
445 }
446 "merge_nodes" => {
447 if let (Some(keep), Some(remove)) = (&op_resp.keep_id, &op_resp.remove_id) {
448 ops.push(GraphOperation::MergeNodes {
449 keep_id: keep.clone(),
450 remove_id: remove.clone(),
451 });
452 }
453 }
454 _ => {} }
456 }
457 Ok(ops)
458}
459
460fn truncate_for_error(s: &str, max: usize) -> &str {
461 if s.len() <= max {
462 s
463 } else {
464 let mut i = max;
465 while i > 0 && !s.is_char_boundary(i) {
466 i -= 1;
467 }
468 &s[..i]
469 }
470}
471
472#[cfg(test)]
473mod tests {
474 use super::*;
475 use crate::models::PatternUpdate;
476
477 #[test]
478 fn test_parse_analysis_response_json() {
479 let json = r#"{
480 "reasoning": "Found recurring instruction across sessions.",
481 "patterns": [
482 {
483 "action": "new",
484 "pattern_type": "repetitive_instruction",
485 "description": "User always asks to use uv",
486 "confidence": 0.85,
487 "source_sessions": ["sess-1"],
488 "related_files": [],
489 "suggested_content": "Always use uv",
490 "suggested_target": "claude_md"
491 },
492 {
493 "action": "update",
494 "existing_id": "pat-123",
495 "new_sessions": ["sess-2"],
496 "new_confidence": 0.92
497 }
498 ]
499 }"#;
500
501 let resp = parse_analysis_response(json).unwrap();
502 assert_eq!(resp.reasoning, "Found recurring instruction across sessions.");
503 assert_eq!(resp.patterns.len(), 2);
504 assert!(matches!(&resp.patterns[0], PatternUpdate::New(_)));
505 assert!(matches!(&resp.patterns[1], PatternUpdate::Update(_)));
506 }
507
508 #[test]
509 fn test_parse_analysis_response_null_fields() {
510 let json = r#"{
511 "reasoning": "Observed a single pattern.",
512 "patterns": [
513 {
514 "action": "new",
515 "pattern_type": "repetitive_instruction",
516 "description": "Some pattern",
517 "confidence": 0.8,
518 "source_sessions": [],
519 "related_files": [],
520 "suggested_content": null,
521 "suggested_target": "claude_md"
522 }
523 ]
524 }"#;
525 let resp = parse_analysis_response(json).unwrap();
526 assert_eq!(resp.patterns.len(), 1);
527 if let PatternUpdate::New(ref p) = resp.patterns[0] {
528 assert_eq!(p.suggested_content, "");
529 } else {
530 panic!("expected New pattern");
531 }
532 }
533
534 #[test]
535 fn test_parse_analysis_response_empty() {
536 let json = r#"{"reasoning": "No recurring patterns found.", "patterns": []}"#;
537 let resp = parse_analysis_response(json).unwrap();
538 assert_eq!(resp.reasoning, "No recurring patterns found.");
539 assert!(resp.patterns.is_empty());
540 }
541
542 #[test]
543 fn test_parse_analysis_response_missing_reasoning_defaults_empty() {
544 let json = r#"{"patterns": []}"#;
545 let resp = parse_analysis_response(json).unwrap();
546 assert_eq!(resp.reasoning, "");
547 assert!(resp.patterns.is_empty());
548 }
549
550 #[test]
551 fn test_parse_analysis_response_pure_prose_fails() {
552 let text = "I analyzed the sessions but found no recurring patterns worth reporting.";
553 let result = parse_analysis_response(text);
554 assert!(result.is_err());
555 }
556
557 #[test]
558 fn test_analysis_response_schema_is_valid_json() {
559 let value: serde_json::Value = serde_json::from_str(ANALYSIS_RESPONSE_SCHEMA)
560 .expect("ANALYSIS_RESPONSE_SCHEMA must be valid JSON");
561 assert_eq!(value["type"], "object");
562 assert!(value["properties"]["patterns"].is_object());
563 }
564
565 #[test]
566 fn test_full_management_analysis_schema_is_valid_json() {
567 let schema_str = full_management_analysis_schema();
568 let value: serde_json::Value =
569 serde_json::from_str(&schema_str).expect("full_management schema must be valid JSON");
570 assert_eq!(value["type"], "object");
571 assert!(value["properties"]["patterns"].is_object());
572 }
573
574 #[test]
575 fn test_full_management_analysis_schema_contains_claude_md_edits() {
576 let schema_str = full_management_analysis_schema();
577 let value: serde_json::Value = serde_json::from_str(&schema_str).unwrap();
578
579 let edits = &value["properties"]["claude_md_edits"];
581 assert!(edits.is_object(), "claude_md_edits should be in properties");
582 assert_eq!(edits["type"], "array");
583
584 let items = &edits["items"];
586 assert_eq!(items["type"], "object");
587 let required: Vec<String> = items["required"]
588 .as_array()
589 .unwrap()
590 .iter()
591 .map(|v| v.as_str().unwrap().to_string())
592 .collect();
593 assert!(required.contains(&"edit_type".to_string()));
594 assert!(required.contains(&"reasoning".to_string()));
595
596 let edit_type_enum = items["properties"]["edit_type"]["enum"]
598 .as_array()
599 .unwrap();
600 let enum_values: Vec<&str> = edit_type_enum.iter().map(|v| v.as_str().unwrap()).collect();
601 assert!(enum_values.contains(&"add"));
602 assert!(enum_values.contains(&"remove"));
603 assert!(enum_values.contains(&"reword"));
604 assert!(enum_values.contains(&"move"));
605
606 assert_eq!(items["additionalProperties"], false);
608 }
609
610 #[test]
611 fn test_full_management_schema_claude_md_edits_not_required() {
612 let schema_str = full_management_analysis_schema();
613 let value: serde_json::Value = serde_json::from_str(&schema_str).unwrap();
614
615 let required: Vec<String> = value["required"]
617 .as_array()
618 .unwrap()
619 .iter()
620 .map(|v| v.as_str().unwrap().to_string())
621 .collect();
622 assert!(
623 !required.contains(&"claude_md_edits".to_string()),
624 "claude_md_edits should NOT be in top-level required"
625 );
626 assert!(required.contains(&"reasoning".to_string()));
628 assert!(required.contains(&"patterns".to_string()));
629 }
630
631 #[test]
632 fn test_full_management_schema_preserves_base_patterns() {
633 let base: serde_json::Value = serde_json::from_str(ANALYSIS_RESPONSE_SCHEMA).unwrap();
635 let full: serde_json::Value =
636 serde_json::from_str(&full_management_analysis_schema()).unwrap();
637
638 assert_eq!(
639 base["properties"]["patterns"],
640 full["properties"]["patterns"],
641 "patterns schema should be identical between base and full_management"
642 );
643 assert_eq!(
644 base["properties"]["reasoning"],
645 full["properties"]["reasoning"],
646 "reasoning schema should be identical"
647 );
648 }
649
650 #[test]
651 fn test_graph_analysis_schema_is_valid_json() {
652 let _: serde_json::Value = serde_json::from_str(GRAPH_ANALYSIS_RESPONSE_SCHEMA)
653 .expect("schema must be valid JSON");
654 }
655
656 #[test]
657 fn test_parse_graph_response() {
658 let json = r#"{
659 "reasoning": "Found testing pattern",
660 "operations": [
661 {
662 "action": "create_node",
663 "node_type": "rule",
664 "scope": "project",
665 "content": "Always run tests",
666 "confidence": 0.85
667 },
668 {
669 "action": "update_node",
670 "node_id": "existing-1",
671 "new_confidence": 0.9
672 }
673 ]
674 }"#;
675 let ops = parse_graph_response(json, Some("my-app")).unwrap();
676 assert_eq!(ops.len(), 2);
677 match &ops[0] {
678 GraphOperation::CreateNode { content, scope, .. } => {
679 assert_eq!(content, "Always run tests");
680 assert_eq!(*scope, NodeScope::Project);
681 }
682 _ => panic!("Expected CreateNode"),
683 }
684 match &ops[1] {
685 GraphOperation::UpdateNode { id, confidence, .. } => {
686 assert_eq!(id, "existing-1");
687 assert_eq!(*confidence, Some(0.9));
688 }
689 _ => panic!("Expected UpdateNode"),
690 }
691 }
692}