pub mod backend;
pub mod claude_cli;
pub mod merge;
pub mod prompts;
use crate::config::Config;
use crate::db;
use crate::errors::CoreError;
use crate::ingest::{context, session};
use crate::models::{AnalysisResponse, AnalyzeResult, BatchDetail};
use crate::scrub;
use chrono::{Duration, Utc};
use rusqlite::Connection;
use std::path::Path;
use backend::AnalysisBackend;
use claude_cli::ClaudeCliBackend;
pub const BATCH_SIZE: usize = 20;
pub const ANALYSIS_RESPONSE_SCHEMA: &str = r#"{
"type": "object",
"properties": {
"reasoning": {"type": "string"},
"patterns": {
"type": "array",
"items": {
"type": "object",
"properties": {
"action": {"type": "string", "enum": ["new", "update"]},
"pattern_type": {"type": "string", "enum": ["repetitive_instruction", "recurring_mistake", "workflow_pattern", "stale_context", "redundant_context"]},
"description": {"type": "string"},
"confidence": {"type": "number"},
"source_sessions": {"type": "array", "items": {"type": "string"}},
"related_files": {"type": "array", "items": {"type": "string"}},
"suggested_content": {"type": "string"},
"suggested_target": {"type": "string", "enum": ["skill", "claude_md", "global_agent", "db_only"]},
"existing_id": {"type": "string"},
"new_sessions": {"type": "array", "items": {"type": "string"}},
"new_confidence": {"type": "number"}
},
"required": ["action"],
"additionalProperties": false
}
}
},
"required": ["reasoning", "patterns"],
"additionalProperties": false
}"#;
pub fn analyze<F>(
conn: &Connection,
config: &Config,
project: Option<&str>,
window_days: u32,
on_batch_start: F,
) -> Result<AnalyzeResult, CoreError>
where
F: Fn(usize, usize, usize, usize),
{
if !ClaudeCliBackend::is_available() {
return Err(CoreError::Analysis(
"claude CLI not found on PATH. Install Claude Code CLI to use analysis.".to_string(),
));
}
let since = Utc::now() - Duration::days(window_days as i64);
let rolling = config.analysis.rolling_window;
let sessions_to_analyze = db::get_sessions_for_analysis(conn, project, &since, rolling)?;
if sessions_to_analyze.is_empty() {
return Ok(AnalyzeResult {
sessions_analyzed: 0,
new_patterns: 0,
updated_patterns: 0,
total_patterns: 0,
input_tokens: 0,
output_tokens: 0,
batch_details: Vec::new(),
});
}
let mut parsed_sessions = Vec::new();
for ingested in &sessions_to_analyze {
let path = Path::new(&ingested.session_path);
if !path.exists() {
eprintln!(
"warning: session file not found: {}",
ingested.session_path
);
continue;
}
match session::parse_session_file(path, &ingested.session_id, &ingested.project) {
Ok(mut s) => {
if config.privacy.scrub_secrets {
scrub::scrub_session(&mut s);
}
parsed_sessions.push(s);
}
Err(e) => {
eprintln!(
"warning: failed to re-parse session {}: {e}",
ingested.session_id
);
}
}
}
let before_filter = parsed_sessions.len();
parsed_sessions.retain(|s| s.user_messages.len() >= 2);
let filtered_out = before_filter - parsed_sessions.len();
if filtered_out > 0 {
eprintln!(
" Skipped {} single-message session{} (no pattern signal)",
filtered_out,
if filtered_out == 1 { "" } else { "s" }
);
}
let analyzed_count = parsed_sessions.len();
if parsed_sessions.is_empty() {
for ingested in &sessions_to_analyze {
db::record_analyzed_session(conn, &ingested.session_id, &ingested.project)?;
}
return Ok(AnalyzeResult {
sessions_analyzed: 0,
new_patterns: 0,
updated_patterns: 0,
total_patterns: 0,
input_tokens: 0,
output_tokens: 0,
batch_details: Vec::new(),
});
}
let context_summary = match project {
Some(project_path) => context::snapshot_context(config, project_path)
.ok()
.map(|s| prompts::build_context_summary(&s))
.filter(|s| !s.is_empty()),
None => None,
};
let backend = ClaudeCliBackend::new(&config.ai);
let mut total_input_tokens: u64 = 0;
let mut total_output_tokens: u64 = 0;
let mut new_count = 0;
let mut update_count = 0;
let mut batch_details: Vec<BatchDetail> = Vec::new();
let total_batches = (parsed_sessions.len() + BATCH_SIZE - 1) / BATCH_SIZE;
for (batch_idx, batch) in parsed_sessions.chunks(BATCH_SIZE).enumerate() {
let existing = db::get_patterns(conn, &["discovered", "active"], project)?;
let prompt = prompts::build_analysis_prompt(batch, &existing, context_summary.as_deref());
let prompt_chars = prompt.len();
on_batch_start(batch_idx, total_batches, batch.len(), prompt_chars);
let response = backend.execute(&prompt, Some(ANALYSIS_RESPONSE_SCHEMA))?;
total_input_tokens += response.input_tokens;
total_output_tokens += response.output_tokens;
let analysis_resp = parse_analysis_response(&response.text).map_err(|e| {
CoreError::Analysis(format!(
"{e}\n(prompt_chars={}, output_tokens={}, result_chars={})",
prompt_chars,
response.output_tokens,
response.text.len()
))
})?;
let reasoning = analysis_resp.reasoning;
let (new_patterns, merge_updates) =
merge::process_updates(analysis_resp.patterns, &existing, project);
let batch_new = new_patterns.len();
let batch_updated = merge_updates.len();
for pattern in &new_patterns {
db::insert_pattern(conn, pattern)?;
new_count += 1;
}
for update in &merge_updates {
db::update_pattern_merge(
conn,
&update.pattern_id,
&update.new_sessions,
update.new_confidence,
Utc::now(),
update.additional_times_seen,
)?;
update_count += 1;
}
let preview = truncate_for_error(&response.text, 500).to_string();
batch_details.push(BatchDetail {
batch_index: batch_idx,
session_count: batch.len(),
session_ids: batch.iter().map(|s| s.session_id.clone()).collect(),
prompt_chars,
input_tokens: response.input_tokens,
output_tokens: response.output_tokens,
new_patterns: batch_new,
updated_patterns: batch_updated,
reasoning,
ai_response_preview: preview,
});
}
for ingested in &sessions_to_analyze {
db::record_analyzed_session(conn, &ingested.session_id, &ingested.project)?;
}
let discovered = db::pattern_count_by_status(conn, "discovered")?;
let active = db::pattern_count_by_status(conn, "active")?;
Ok(AnalyzeResult {
sessions_analyzed: analyzed_count,
new_patterns: new_count,
updated_patterns: update_count,
total_patterns: (discovered + active) as usize,
input_tokens: total_input_tokens,
output_tokens: total_output_tokens,
batch_details,
})
}
fn parse_analysis_response(text: &str) -> Result<AnalysisResponse, CoreError> {
let trimmed = text.trim();
let response: AnalysisResponse = serde_json::from_str(trimmed).map_err(|e| {
CoreError::Analysis(format!(
"failed to parse AI response as JSON: {e}\nresponse text: {}",
truncate_for_error(text, 1500)
))
})?;
Ok(response)
}
fn truncate_for_error(s: &str, max: usize) -> &str {
if s.len() <= max {
s
} else {
let mut i = max;
while i > 0 && !s.is_char_boundary(i) {
i -= 1;
}
&s[..i]
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::models::PatternUpdate;
#[test]
fn test_parse_analysis_response_json() {
let json = r#"{
"reasoning": "Found recurring instruction across sessions.",
"patterns": [
{
"action": "new",
"pattern_type": "repetitive_instruction",
"description": "User always asks to use uv",
"confidence": 0.85,
"source_sessions": ["sess-1"],
"related_files": [],
"suggested_content": "Always use uv",
"suggested_target": "claude_md"
},
{
"action": "update",
"existing_id": "pat-123",
"new_sessions": ["sess-2"],
"new_confidence": 0.92
}
]
}"#;
let resp = parse_analysis_response(json).unwrap();
assert_eq!(resp.reasoning, "Found recurring instruction across sessions.");
assert_eq!(resp.patterns.len(), 2);
assert!(matches!(&resp.patterns[0], PatternUpdate::New(_)));
assert!(matches!(&resp.patterns[1], PatternUpdate::Update(_)));
}
#[test]
fn test_parse_analysis_response_null_fields() {
let json = r#"{
"reasoning": "Observed a single pattern.",
"patterns": [
{
"action": "new",
"pattern_type": "repetitive_instruction",
"description": "Some pattern",
"confidence": 0.8,
"source_sessions": [],
"related_files": [],
"suggested_content": null,
"suggested_target": "claude_md"
}
]
}"#;
let resp = parse_analysis_response(json).unwrap();
assert_eq!(resp.patterns.len(), 1);
if let PatternUpdate::New(ref p) = resp.patterns[0] {
assert_eq!(p.suggested_content, "");
} else {
panic!("expected New pattern");
}
}
#[test]
fn test_parse_analysis_response_empty() {
let json = r#"{"reasoning": "No recurring patterns found.", "patterns": []}"#;
let resp = parse_analysis_response(json).unwrap();
assert_eq!(resp.reasoning, "No recurring patterns found.");
assert!(resp.patterns.is_empty());
}
#[test]
fn test_parse_analysis_response_missing_reasoning_defaults_empty() {
let json = r#"{"patterns": []}"#;
let resp = parse_analysis_response(json).unwrap();
assert_eq!(resp.reasoning, "");
assert!(resp.patterns.is_empty());
}
#[test]
fn test_parse_analysis_response_pure_prose_fails() {
let text = "I analyzed the sessions but found no recurring patterns worth reporting.";
let result = parse_analysis_response(text);
assert!(result.is_err());
}
#[test]
fn test_analysis_response_schema_is_valid_json() {
let value: serde_json::Value = serde_json::from_str(ANALYSIS_RESPONSE_SCHEMA)
.expect("ANALYSIS_RESPONSE_SCHEMA must be valid JSON");
assert_eq!(value["type"], "object");
assert!(value["properties"]["patterns"].is_object());
}
}