Skip to main content

ccboard_core/analytics/
tool_chains.rs

1//! Tool chain analysis — bigram/trigram patterns across sessions
2//!
3//! Extracts recurring tool co-occurrence sequences from session metadata
4//! to identify common workflows and expensive tool patterns.
5
6use chrono::{DateTime, Utc};
7use serde::{Deserialize, Serialize};
8use std::collections::{HashMap, HashSet};
9use std::sync::Arc;
10
11use crate::models::SessionMetadata;
12
13/// A recurring sequence of tools used together
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct ToolChain {
16    /// Ordered tool names in the sequence
17    pub sequence: Vec<String>,
18    /// How many times this sequence occurred across sessions
19    pub frequency: usize,
20    /// Number of distinct sessions containing this sequence
21    pub sessions_count: usize,
22}
23
24/// Complete tool chain analysis results
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct ToolChainAnalysis {
27    /// Top 10 tool pairs by frequency
28    pub top_bigrams: Vec<ToolChain>,
29    /// Top 10 tool triples by frequency
30    pub top_trigrams: Vec<ToolChain>,
31    /// Top 10 chains by token cost (uses call frequency as proxy)
32    pub most_expensive_chains: Vec<ToolChain>,
33    /// Timestamp of computation
34    pub computed_at: DateTime<Utc>,
35}
36
37impl ToolChainAnalysis {
38    /// Create empty analysis (used when no sessions available)
39    pub fn empty() -> Self {
40        Self {
41            top_bigrams: Vec::new(),
42            top_trigrams: Vec::new(),
43            most_expensive_chains: Vec::new(),
44            computed_at: Utc::now(),
45        }
46    }
47}
48
49/// Analyze tool co-occurrence patterns across sessions
50///
51/// Uses sorted tool name lists as a proxy for ordering (tools within a session
52/// co-occur but JSONL doesn't preserve strict cross-message ordering at metadata level).
53/// Bigrams and trigrams represent tools that appear together in the same session.
54pub fn analyze_tool_chains(sessions: &[Arc<SessionMetadata>]) -> ToolChainAnalysis {
55    if sessions.is_empty() {
56        return ToolChainAnalysis::empty();
57    }
58
59    // bigrams: Vec<tool_name> -> (frequency, set of session IDs)
60    let mut bigrams: HashMap<Vec<String>, (usize, HashSet<String>)> = HashMap::new();
61    let mut trigrams: HashMap<Vec<String>, (usize, HashSet<String>)> = HashMap::new();
62
63    for session in sessions {
64        if session.tool_usage.is_empty() {
65            continue;
66        }
67
68        // Use sorted tool names to ensure deterministic ordering
69        let mut tools: Vec<String> = session.tool_usage.keys().cloned().collect();
70        tools.sort();
71
72        let session_id = session.id.to_string();
73
74        // Generate bigrams from sorted tool list
75        for pair in tools.windows(2) {
76            let key = pair.to_vec();
77            let entry = bigrams.entry(key).or_insert_with(|| (0, HashSet::new()));
78            entry.0 += 1;
79            entry.1.insert(session_id.clone());
80        }
81
82        // Generate trigrams from sorted tool list
83        for triple in tools.windows(3) {
84            let key = triple.to_vec();
85            let entry = trigrams.entry(key).or_insert_with(|| (0, HashSet::new()));
86            entry.0 += 1;
87            entry.1.insert(session_id.clone());
88        }
89    }
90
91    let mut top_bigrams: Vec<ToolChain> = bigrams
92        .into_iter()
93        .map(|(seq, (freq, sess))| ToolChain {
94            sequence: seq,
95            frequency: freq,
96            sessions_count: sess.len(),
97        })
98        .collect();
99    top_bigrams.sort_by(|a, b| b.frequency.cmp(&a.frequency));
100    top_bigrams.truncate(10);
101
102    let mut top_trigrams: Vec<ToolChain> = trigrams
103        .into_iter()
104        .map(|(seq, (freq, sess))| ToolChain {
105            sequence: seq,
106            frequency: freq,
107            sessions_count: sess.len(),
108        })
109        .collect();
110    top_trigrams.sort_by(|a, b| b.frequency.cmp(&a.frequency));
111    top_trigrams.truncate(10);
112
113    // Most expensive chains: rank bigrams by combined token usage
114    let mut expensive_chains = top_bigrams.clone();
115    expensive_chains.sort_by(|a, b| {
116        let score_a = a.frequency * a.sessions_count;
117        let score_b = b.frequency * b.sessions_count;
118        score_b.cmp(&score_a)
119    });
120    expensive_chains.truncate(10);
121
122    ToolChainAnalysis {
123        top_bigrams,
124        top_trigrams,
125        most_expensive_chains: expensive_chains,
126        computed_at: Utc::now(),
127    }
128}
129
130#[cfg(test)]
131mod tests {
132    use super::*;
133    use std::collections::HashMap;
134    use std::path::PathBuf;
135
136    use crate::models::session::{ProjectId, SessionId};
137
138    fn make_session(id: &str, tools: &[(&str, usize)]) -> Arc<SessionMetadata> {
139        let mut tool_usage = HashMap::new();
140        for (name, count) in tools {
141            tool_usage.insert(name.to_string(), *count);
142        }
143        let mut meta = SessionMetadata::from_path(
144            PathBuf::from(format!("/tmp/{}.jsonl", id)),
145            ProjectId::from("test"),
146        );
147        meta.id = SessionId::from(id);
148        meta.tool_usage = tool_usage;
149        Arc::new(meta)
150    }
151
152    #[test]
153    fn test_empty_sessions() {
154        let result = analyze_tool_chains(&[]);
155        assert!(result.top_bigrams.is_empty());
156        assert!(result.top_trigrams.is_empty());
157    }
158
159    #[test]
160    fn test_bigrams_extracted() {
161        let sessions = vec![
162            make_session("s1", &[("Bash", 3), ("Read", 2), ("Write", 1)]),
163            make_session("s2", &[("Bash", 5), ("Read", 1)]),
164            make_session("s3", &[("Bash", 2), ("Read", 2), ("Grep", 1)]),
165        ];
166
167        let result = analyze_tool_chains(&sessions);
168
169        // Bash+Read appears in s1 (Bash,Read,Write sorted) and s2 (Bash,Read sorted)
170        // s3 has Bash,Grep,Read sorted → bigrams are Bash+Grep and Grep+Read, not Bash+Read
171        let bash_read = result
172            .top_bigrams
173            .iter()
174            .find(|c| c.sequence == vec!["Bash", "Read"]);
175        assert!(bash_read.is_some(), "Bash+Read bigram should exist");
176        assert_eq!(bash_read.unwrap().frequency, 2);
177        assert_eq!(bash_read.unwrap().sessions_count, 2);
178    }
179
180    #[test]
181    fn test_trigrams_extracted() {
182        let sessions = vec![
183            make_session("s1", &[("Bash", 3), ("Read", 2), ("Write", 1)]),
184            make_session("s2", &[("Bash", 1), ("Read", 1), ("Write", 1)]),
185        ];
186
187        let result = analyze_tool_chains(&sessions);
188
189        // Bash+Read+Write trigram should appear in both sessions
190        let bash_read_write = result
191            .top_trigrams
192            .iter()
193            .find(|c| c.sequence == vec!["Bash", "Read", "Write"]);
194        assert!(
195            bash_read_write.is_some(),
196            "Bash+Read+Write trigram should exist"
197        );
198        assert_eq!(bash_read_write.unwrap().frequency, 2);
199    }
200
201    #[test]
202    fn test_no_tools_session_skipped() {
203        let sessions = vec![
204            make_session("s1", &[]),
205            make_session("s2", &[("Read", 1), ("Write", 1)]),
206        ];
207
208        let result = analyze_tool_chains(&sessions);
209        // Only s2 contributes, so Read+Write bigram exists
210        assert_eq!(result.top_bigrams.len(), 1);
211    }
212}