1use chrono::{DateTime, Utc};
7use serde::{Deserialize, Serialize};
8use std::collections::{HashMap, HashSet};
9use std::sync::Arc;
10
11use crate::models::SessionMetadata;
12
13#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct ToolChain {
16 pub sequence: Vec<String>,
18 pub frequency: usize,
20 pub sessions_count: usize,
22}
23
24#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct ToolChainAnalysis {
27 pub top_bigrams: Vec<ToolChain>,
29 pub top_trigrams: Vec<ToolChain>,
31 pub most_expensive_chains: Vec<ToolChain>,
33 pub computed_at: DateTime<Utc>,
35}
36
37impl ToolChainAnalysis {
38 pub fn empty() -> Self {
40 Self {
41 top_bigrams: Vec::new(),
42 top_trigrams: Vec::new(),
43 most_expensive_chains: Vec::new(),
44 computed_at: Utc::now(),
45 }
46 }
47}
48
49pub fn analyze_tool_chains(sessions: &[Arc<SessionMetadata>]) -> ToolChainAnalysis {
55 if sessions.is_empty() {
56 return ToolChainAnalysis::empty();
57 }
58
59 let mut bigrams: HashMap<Vec<String>, (usize, HashSet<String>)> = HashMap::new();
61 let mut trigrams: HashMap<Vec<String>, (usize, HashSet<String>)> = HashMap::new();
62
63 for session in sessions {
64 if session.tool_usage.is_empty() {
65 continue;
66 }
67
68 let mut tools: Vec<String> = session.tool_usage.keys().cloned().collect();
70 tools.sort();
71
72 let session_id = session.id.to_string();
73
74 for pair in tools.windows(2) {
76 let key = pair.to_vec();
77 let entry = bigrams.entry(key).or_insert_with(|| (0, HashSet::new()));
78 entry.0 += 1;
79 entry.1.insert(session_id.clone());
80 }
81
82 for triple in tools.windows(3) {
84 let key = triple.to_vec();
85 let entry = trigrams.entry(key).or_insert_with(|| (0, HashSet::new()));
86 entry.0 += 1;
87 entry.1.insert(session_id.clone());
88 }
89 }
90
91 let mut top_bigrams: Vec<ToolChain> = bigrams
92 .into_iter()
93 .map(|(seq, (freq, sess))| ToolChain {
94 sequence: seq,
95 frequency: freq,
96 sessions_count: sess.len(),
97 })
98 .collect();
99 top_bigrams.sort_by(|a, b| b.frequency.cmp(&a.frequency));
100 top_bigrams.truncate(10);
101
102 let mut top_trigrams: Vec<ToolChain> = trigrams
103 .into_iter()
104 .map(|(seq, (freq, sess))| ToolChain {
105 sequence: seq,
106 frequency: freq,
107 sessions_count: sess.len(),
108 })
109 .collect();
110 top_trigrams.sort_by(|a, b| b.frequency.cmp(&a.frequency));
111 top_trigrams.truncate(10);
112
113 let mut expensive_chains = top_bigrams.clone();
115 expensive_chains.sort_by(|a, b| {
116 let score_a = a.frequency * a.sessions_count;
117 let score_b = b.frequency * b.sessions_count;
118 score_b.cmp(&score_a)
119 });
120 expensive_chains.truncate(10);
121
122 ToolChainAnalysis {
123 top_bigrams,
124 top_trigrams,
125 most_expensive_chains: expensive_chains,
126 computed_at: Utc::now(),
127 }
128}
129
130#[cfg(test)]
131mod tests {
132 use super::*;
133 use std::collections::HashMap;
134 use std::path::PathBuf;
135
136 use crate::models::session::{ProjectId, SessionId};
137
138 fn make_session(id: &str, tools: &[(&str, usize)]) -> Arc<SessionMetadata> {
139 let mut tool_usage = HashMap::new();
140 for (name, count) in tools {
141 tool_usage.insert(name.to_string(), *count);
142 }
143 let mut meta = SessionMetadata::from_path(
144 PathBuf::from(format!("/tmp/{}.jsonl", id)),
145 ProjectId::from("test"),
146 );
147 meta.id = SessionId::from(id);
148 meta.tool_usage = tool_usage;
149 Arc::new(meta)
150 }
151
152 #[test]
153 fn test_empty_sessions() {
154 let result = analyze_tool_chains(&[]);
155 assert!(result.top_bigrams.is_empty());
156 assert!(result.top_trigrams.is_empty());
157 }
158
159 #[test]
160 fn test_bigrams_extracted() {
161 let sessions = vec![
162 make_session("s1", &[("Bash", 3), ("Read", 2), ("Write", 1)]),
163 make_session("s2", &[("Bash", 5), ("Read", 1)]),
164 make_session("s3", &[("Bash", 2), ("Read", 2), ("Grep", 1)]),
165 ];
166
167 let result = analyze_tool_chains(&sessions);
168
169 let bash_read = result
172 .top_bigrams
173 .iter()
174 .find(|c| c.sequence == vec!["Bash", "Read"]);
175 assert!(bash_read.is_some(), "Bash+Read bigram should exist");
176 assert_eq!(bash_read.unwrap().frequency, 2);
177 assert_eq!(bash_read.unwrap().sessions_count, 2);
178 }
179
180 #[test]
181 fn test_trigrams_extracted() {
182 let sessions = vec![
183 make_session("s1", &[("Bash", 3), ("Read", 2), ("Write", 1)]),
184 make_session("s2", &[("Bash", 1), ("Read", 1), ("Write", 1)]),
185 ];
186
187 let result = analyze_tool_chains(&sessions);
188
189 let bash_read_write = result
191 .top_trigrams
192 .iter()
193 .find(|c| c.sequence == vec!["Bash", "Read", "Write"]);
194 assert!(
195 bash_read_write.is_some(),
196 "Bash+Read+Write trigram should exist"
197 );
198 assert_eq!(bash_read_write.unwrap().frequency, 2);
199 }
200
201 #[test]
202 fn test_no_tools_session_skipped() {
203 let sessions = vec![
204 make_session("s1", &[]),
205 make_session("s2", &[("Read", 1), ("Write", 1)]),
206 ];
207
208 let result = analyze_tool_chains(&sessions);
209 assert_eq!(result.top_bigrams.len(), 1);
211 }
212}