use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use crate::models::SessionMetadata;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolChain {
pub sequence: Vec<String>,
pub frequency: usize,
pub sessions_count: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolChainAnalysis {
pub top_bigrams: Vec<ToolChain>,
pub top_trigrams: Vec<ToolChain>,
pub most_expensive_chains: Vec<ToolChain>,
pub computed_at: DateTime<Utc>,
}
impl ToolChainAnalysis {
pub fn empty() -> Self {
Self {
top_bigrams: Vec::new(),
top_trigrams: Vec::new(),
most_expensive_chains: Vec::new(),
computed_at: Utc::now(),
}
}
}
pub fn analyze_tool_chains(sessions: &[Arc<SessionMetadata>]) -> ToolChainAnalysis {
if sessions.is_empty() {
return ToolChainAnalysis::empty();
}
let mut bigrams: HashMap<Vec<String>, (usize, HashSet<String>)> = HashMap::new();
let mut trigrams: HashMap<Vec<String>, (usize, HashSet<String>)> = HashMap::new();
for session in sessions {
if session.tool_usage.is_empty() {
continue;
}
let mut tools: Vec<String> = session.tool_usage.keys().cloned().collect();
tools.sort();
let session_id = session.id.to_string();
for pair in tools.windows(2) {
let key = pair.to_vec();
let entry = bigrams.entry(key).or_insert_with(|| (0, HashSet::new()));
entry.0 += 1;
entry.1.insert(session_id.clone());
}
for triple in tools.windows(3) {
let key = triple.to_vec();
let entry = trigrams.entry(key).or_insert_with(|| (0, HashSet::new()));
entry.0 += 1;
entry.1.insert(session_id.clone());
}
}
let mut top_bigrams: Vec<ToolChain> = bigrams
.into_iter()
.map(|(seq, (freq, sess))| ToolChain {
sequence: seq,
frequency: freq,
sessions_count: sess.len(),
})
.collect();
top_bigrams.sort_by(|a, b| b.frequency.cmp(&a.frequency));
top_bigrams.truncate(10);
let mut top_trigrams: Vec<ToolChain> = trigrams
.into_iter()
.map(|(seq, (freq, sess))| ToolChain {
sequence: seq,
frequency: freq,
sessions_count: sess.len(),
})
.collect();
top_trigrams.sort_by(|a, b| b.frequency.cmp(&a.frequency));
top_trigrams.truncate(10);
let mut expensive_chains = top_bigrams.clone();
expensive_chains.sort_by(|a, b| {
let score_a = a.frequency * a.sessions_count;
let score_b = b.frequency * b.sessions_count;
score_b.cmp(&score_a)
});
expensive_chains.truncate(10);
ToolChainAnalysis {
top_bigrams,
top_trigrams,
most_expensive_chains: expensive_chains,
computed_at: Utc::now(),
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashMap;
use std::path::PathBuf;
use crate::models::session::{ProjectId, SessionId};
fn make_session(id: &str, tools: &[(&str, usize)]) -> Arc<SessionMetadata> {
let mut tool_usage = HashMap::new();
for (name, count) in tools {
tool_usage.insert(name.to_string(), *count);
}
let mut meta = SessionMetadata::from_path(
PathBuf::from(format!("/tmp/{}.jsonl", id)),
ProjectId::from("test"),
);
meta.id = SessionId::from(id);
meta.tool_usage = tool_usage;
Arc::new(meta)
}
#[test]
fn test_empty_sessions() {
let result = analyze_tool_chains(&[]);
assert!(result.top_bigrams.is_empty());
assert!(result.top_trigrams.is_empty());
}
#[test]
fn test_bigrams_extracted() {
let sessions = vec![
make_session("s1", &[("Bash", 3), ("Read", 2), ("Write", 1)]),
make_session("s2", &[("Bash", 5), ("Read", 1)]),
make_session("s3", &[("Bash", 2), ("Read", 2), ("Grep", 1)]),
];
let result = analyze_tool_chains(&sessions);
let bash_read = result
.top_bigrams
.iter()
.find(|c| c.sequence == vec!["Bash", "Read"]);
assert!(bash_read.is_some(), "Bash+Read bigram should exist");
assert_eq!(bash_read.unwrap().frequency, 2);
assert_eq!(bash_read.unwrap().sessions_count, 2);
}
#[test]
fn test_trigrams_extracted() {
let sessions = vec![
make_session("s1", &[("Bash", 3), ("Read", 2), ("Write", 1)]),
make_session("s2", &[("Bash", 1), ("Read", 1), ("Write", 1)]),
];
let result = analyze_tool_chains(&sessions);
let bash_read_write = result
.top_trigrams
.iter()
.find(|c| c.sequence == vec!["Bash", "Read", "Write"]);
assert!(
bash_read_write.is_some(),
"Bash+Read+Write trigram should exist"
);
assert_eq!(bash_read_write.unwrap().frequency, 2);
}
#[test]
fn test_no_tools_session_skipped() {
let sessions = vec![
make_session("s1", &[]),
make_session("s2", &[("Read", 1), ("Write", 1)]),
];
let result = analyze_tool_chains(&sessions);
assert_eq!(result.top_bigrams.len(), 1);
}
}