Skip to main content

kaizen/collect/tail/
copilot_cli.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! Ingest GitHub Copilot CLI sessions from `~/.copilot/session-state/<id>/events.jsonl`.
3
4use crate::collect::model_from_json;
5use crate::collect::tail::dir_mtime_ms;
6use crate::core::cost::estimate_tail_event_cost_usd_e6;
7use crate::core::event::{Event, EventKind, EventSource, SessionRecord, SessionStatus};
8use anyhow::{Context, Result};
9use serde_json::Value;
10use std::path::{Path, PathBuf};
11
12const AGENT: &str = "copilot-cli";
13
14fn copilot_home() -> PathBuf {
15    if let Ok(p) = std::env::var("COPILOT_HOME") {
16        return PathBuf::from(p);
17    }
18    if let Ok(h) = std::env::var("HOME") {
19        return PathBuf::from(h).join(".copilot");
20    }
21    PathBuf::from(".copilot")
22}
23
24fn canonical(p: &Path) -> PathBuf {
25    std::fs::canonicalize(p).unwrap_or_else(|_| p.to_path_buf())
26}
27
28fn paths_equal(a: &Path, b: &Path) -> bool {
29    canonical(a) == canonical(b)
30}
31
32fn session_workspace_path(session_dir: &Path) -> Option<PathBuf> {
33    let wj = session_dir.join("workspace.json");
34    if let Ok(text) = std::fs::read_to_string(&wj)
35        && let Ok(v) = serde_json::from_str::<Value>(&text)
36    {
37        for key in ["workspaceFolder", "cwd", "workingDirectory", "folder"] {
38            if let Some(s) = v.get(key).and_then(|x| x.as_str()) {
39                let p = s.strip_prefix("file://").unwrap_or(s);
40                return Some(PathBuf::from(p));
41            }
42        }
43    }
44    let meta = session_dir.join("metadata.json");
45    if let Ok(text) = std::fs::read_to_string(&meta)
46        && let Ok(v) = serde_json::from_str::<Value>(&text)
47        && let Some(s) = v
48            .get("workspaceFolder")
49            .or_else(|| v.get("cwd"))
50            .and_then(|x| x.as_str())
51    {
52        let p = s.strip_prefix("file://").unwrap_or(s);
53        return Some(PathBuf::from(p));
54    }
55    None
56}
57
58/// Parse one line from Copilot CLI `events.jsonl`.
59pub fn parse_copilot_cli_line(
60    session_id: &str,
61    seq: u64,
62    base_ts: u64,
63    line: &str,
64) -> Result<Option<Event>> {
65    let v: Value = serde_json::from_str(line.trim()).context("copilot cli jsonl")?;
66    let obj = match v.as_object() {
67        Some(o) => o,
68        None => return Ok(None),
69    };
70
71    let ts_ms = obj
72        .get("timestamp_ms")
73        .or_else(|| obj.get("timestamp"))
74        .and_then(|t| t.as_u64())
75        .unwrap_or(base_ts + seq);
76
77    if let Some(tool_calls) = obj.get("tool_calls").and_then(|t| t.as_array())
78        && let Some(first) = tool_calls.first()
79    {
80        let tool_name = first
81            .get("function")
82            .and_then(|f| f.get("name"))
83            .or_else(|| first.get("name"))
84            .and_then(|n| n.as_str())
85            .unwrap_or("")
86            .to_string();
87        let tokens_in = obj
88            .get("usage")
89            .and_then(|u| u.get("prompt_tokens"))
90            .and_then(|x| x.as_u64())
91            .map(|x| x as u32);
92        let tokens_out = obj
93            .get("usage")
94            .and_then(|u| u.get("completion_tokens"))
95            .and_then(|x| x.as_u64())
96            .map(|x| x as u32);
97        let line_model = model_from_json::from_object(obj);
98        let cost_usd_e6 =
99            estimate_tail_event_cost_usd_e6(line_model.as_deref(), tokens_in, tokens_out, None);
100        return Ok(Some(Event {
101            session_id: session_id.to_string(),
102            seq,
103            ts_ms,
104            ts_exact: true,
105            kind: EventKind::ToolCall,
106            source: EventSource::Tail,
107            tool: Some(tool_name),
108            tool_call_id: first
109                .get("id")
110                .and_then(|x| x.as_str())
111                .map(ToOwned::to_owned),
112            tokens_in,
113            tokens_out,
114            reasoning_tokens: None,
115            cost_usd_e6,
116            stop_reason: None,
117            latency_ms: None,
118            ttft_ms: None,
119            retry_count: None,
120            context_used_tokens: None,
121            context_max_tokens: None,
122            cache_creation_tokens: None,
123            cache_read_tokens: None,
124            system_prompt_tokens: None,
125            payload: v.clone(),
126        }));
127    }
128
129    if let Some(name) = obj
130        .get("tool")
131        .and_then(|t| t.get("name"))
132        .or_else(|| obj.get("toolName"))
133        .and_then(|n| n.as_str())
134    {
135        return Ok(Some(Event {
136            session_id: session_id.to_string(),
137            seq,
138            ts_ms,
139            ts_exact: true,
140            kind: EventKind::ToolCall,
141            source: EventSource::Tail,
142            tool: Some(name.to_string()),
143            tool_call_id: obj
144                .get("tool_call_id")
145                .or_else(|| obj.get("id"))
146                .and_then(|x| x.as_str())
147                .map(ToOwned::to_owned),
148            tokens_in: None,
149            tokens_out: None,
150            reasoning_tokens: None,
151            cost_usd_e6: None,
152            stop_reason: None,
153            latency_ms: None,
154            ttft_ms: None,
155            retry_count: None,
156            context_used_tokens: None,
157            context_max_tokens: None,
158            cache_creation_tokens: None,
159            cache_read_tokens: None,
160            system_prompt_tokens: None,
161            payload: v.clone(),
162        }));
163    }
164
165    Ok(None)
166}
167
168/// Scan one Copilot CLI session directory if it belongs to `workspace`.
169pub fn scan_copilot_cli_session_dir(
170    session_dir: &Path,
171    workspace: &Path,
172) -> Result<Option<(SessionRecord, Vec<Event>)>> {
173    let events_path = session_dir.join("events.jsonl");
174    if !events_path.is_file() {
175        return Ok(None);
176    }
177
178    let ws_match = if let Some(w) = session_workspace_path(session_dir) {
179        paths_equal(&w, workspace)
180    } else {
181        false
182    };
183    if !ws_match {
184        return Ok(None);
185    }
186
187    let session_id = session_dir
188        .file_name()
189        .and_then(|n| n.to_str())
190        .unwrap_or("copilot-cli")
191        .to_string();
192
193    let base_ts = dir_mtime_ms(session_dir);
194    let content = std::fs::read_to_string(&events_path)?;
195    let mut events = Vec::new();
196    let mut seq: u64 = 0;
197    let mut model: Option<String> = None;
198    for line in content.lines() {
199        if line.trim().is_empty() {
200            continue;
201        }
202        if let Ok(v) = serde_json::from_str::<Value>(line)
203            && let Some(m) = model_from_json::from_value(&v)
204        {
205            model = Some(m);
206        }
207        if let Some(ev) = parse_copilot_cli_line(&session_id, seq, base_ts, line)? {
208            events.push(ev);
209        }
210        seq += 1;
211    }
212
213    if events.is_empty() {
214        return Ok(None);
215    }
216
217    Ok(Some((
218        SessionRecord {
219            id: session_id,
220            agent: AGENT.to_string(),
221            model,
222            workspace: workspace.to_string_lossy().to_string(),
223            started_at_ms: dir_mtime_ms(session_dir),
224            ended_at_ms: None,
225            status: SessionStatus::Done,
226            trace_path: session_dir.to_string_lossy().to_string(),
227            start_commit: None,
228            end_commit: None,
229            branch: None,
230            dirty_start: None,
231            dirty_end: None,
232            repo_binding_source: None,
233            prompt_fingerprint: None,
234            parent_session_id: None,
235            agent_version: None,
236            os: None,
237            arch: None,
238            repo_file_count: None,
239            repo_total_loc: None,
240        },
241        events,
242    )))
243}
244
245/// All Copilot CLI sessions for this workspace.
246pub fn scan_copilot_cli_workspace(workspace: &Path) -> Result<Vec<(SessionRecord, Vec<Event>)>> {
247    let home = copilot_home();
248    let state = home.join("session-state");
249    if !state.is_dir() {
250        return Ok(vec![]);
251    }
252    let mut out = Vec::new();
253    for e in std::fs::read_dir(&state)? {
254        let e = e?;
255        let p = e.path();
256        if !p.is_dir() {
257            continue;
258        }
259        if let Some(pair) = scan_copilot_cli_session_dir(&p, workspace)? {
260            out.push(pair);
261        }
262    }
263    Ok(out)
264}
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269    use tempfile::TempDir;
270
271    #[test]
272    fn copilot_cli_tool_calls_line() {
273        let line = r#"{"role":"assistant","tool_calls":[{"id":"call_1","type":"function","function":{"name":"run_terminal_cmd","arguments":"{}"}}],"timestamp_ms":1000}"#;
274        let ev = parse_copilot_cli_line("s1", 0, 0, line).unwrap().unwrap();
275        assert_eq!(ev.kind, EventKind::ToolCall);
276        assert_eq!(ev.tool.as_deref(), Some("run_terminal_cmd"));
277    }
278
279    #[test]
280    fn copilot_cli_session_fixture() {
281        let dir = TempDir::new().unwrap();
282        let ws = dir.path().join("repo");
283        std::fs::create_dir_all(&ws).unwrap();
284        let ws_canon = std::fs::canonicalize(&ws).unwrap();
285
286        let sess = dir.path().join("session-state/sess-abc");
287        std::fs::create_dir_all(&sess).unwrap();
288        std::fs::write(
289            sess.join("workspace.json"),
290            format!(
291                r#"{{"workspaceFolder": "{}"}}"#,
292                ws_canon.to_string_lossy().replace('\\', "\\\\")
293            ),
294        )
295        .unwrap();
296        let line = r#"{"role":"assistant","tool_calls":[{"id":"c1","type":"function","function":{"name":"read_file","arguments":"{}"}}],"timestamp_ms":5000}"#;
297        std::fs::write(sess.join("events.jsonl"), line).unwrap();
298
299        let pair = scan_copilot_cli_session_dir(&sess, &ws_canon)
300            .unwrap()
301            .expect("pair");
302        assert_eq!(pair.0.agent, "copilot-cli");
303        assert!(!pair.1.is_empty());
304    }
305}