Skip to main content

kaizen/collect/tail/
copilot_cli.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! Ingest GitHub Copilot CLI sessions from `~/.copilot/session-state/<id>/events.jsonl`.
3
4use crate::collect::model_from_json;
5use crate::collect::tail::dir_mtime_ms;
6use crate::core::event::{Event, EventKind, EventSource, SessionRecord, SessionStatus};
7use anyhow::{Context, Result};
8use serde_json::Value;
9use std::path::{Path, PathBuf};
10
11const AGENT: &str = "copilot-cli";
12
13fn copilot_home() -> PathBuf {
14    if let Ok(p) = std::env::var("COPILOT_HOME") {
15        return PathBuf::from(p);
16    }
17    if let Ok(h) = std::env::var("HOME") {
18        return PathBuf::from(h).join(".copilot");
19    }
20    PathBuf::from(".copilot")
21}
22
23fn canonical(p: &Path) -> PathBuf {
24    std::fs::canonicalize(p).unwrap_or_else(|_| p.to_path_buf())
25}
26
27fn paths_equal(a: &Path, b: &Path) -> bool {
28    canonical(a) == canonical(b)
29}
30
31fn session_workspace_path(session_dir: &Path) -> Option<PathBuf> {
32    let wj = session_dir.join("workspace.json");
33    if let Ok(text) = std::fs::read_to_string(&wj)
34        && let Ok(v) = serde_json::from_str::<Value>(&text)
35    {
36        for key in ["workspaceFolder", "cwd", "workingDirectory", "folder"] {
37            if let Some(s) = v.get(key).and_then(|x| x.as_str()) {
38                let p = s.strip_prefix("file://").unwrap_or(s);
39                return Some(PathBuf::from(p));
40            }
41        }
42    }
43    let meta = session_dir.join("metadata.json");
44    if let Ok(text) = std::fs::read_to_string(&meta)
45        && let Ok(v) = serde_json::from_str::<Value>(&text)
46        && let Some(s) = v
47            .get("workspaceFolder")
48            .or_else(|| v.get("cwd"))
49            .and_then(|x| x.as_str())
50    {
51        let p = s.strip_prefix("file://").unwrap_or(s);
52        return Some(PathBuf::from(p));
53    }
54    None
55}
56
57/// Parse one line from Copilot CLI `events.jsonl`.
58pub fn parse_copilot_cli_line(
59    session_id: &str,
60    seq: u64,
61    base_ts: u64,
62    line: &str,
63) -> Result<Option<Event>> {
64    let v: Value = serde_json::from_str(line.trim()).context("copilot cli jsonl")?;
65    let obj = match v.as_object() {
66        Some(o) => o,
67        None => return Ok(None),
68    };
69
70    let ts_ms = obj
71        .get("timestamp_ms")
72        .or_else(|| obj.get("timestamp"))
73        .and_then(|t| t.as_u64())
74        .unwrap_or(base_ts + seq);
75
76    if let Some(tool_calls) = obj.get("tool_calls").and_then(|t| t.as_array())
77        && let Some(first) = tool_calls.first()
78    {
79        let tool_name = first
80            .get("function")
81            .and_then(|f| f.get("name"))
82            .or_else(|| first.get("name"))
83            .and_then(|n| n.as_str())
84            .unwrap_or("")
85            .to_string();
86        return Ok(Some(Event {
87            session_id: session_id.to_string(),
88            seq,
89            ts_ms,
90            ts_exact: true,
91            kind: EventKind::ToolCall,
92            source: EventSource::Tail,
93            tool: Some(tool_name),
94            tool_call_id: first
95                .get("id")
96                .and_then(|x| x.as_str())
97                .map(ToOwned::to_owned),
98            tokens_in: obj
99                .get("usage")
100                .and_then(|u| u.get("prompt_tokens"))
101                .and_then(|x| x.as_u64())
102                .map(|x| x as u32),
103            tokens_out: obj
104                .get("usage")
105                .and_then(|u| u.get("completion_tokens"))
106                .and_then(|x| x.as_u64())
107                .map(|x| x as u32),
108            reasoning_tokens: None,
109            cost_usd_e6: None,
110            stop_reason: None,
111            latency_ms: None,
112            ttft_ms: None,
113            retry_count: None,
114            context_used_tokens: None,
115            context_max_tokens: None,
116            cache_creation_tokens: None,
117            cache_read_tokens: None,
118            system_prompt_tokens: None,
119            payload: v.clone(),
120        }));
121    }
122
123    if let Some(name) = obj
124        .get("tool")
125        .and_then(|t| t.get("name"))
126        .or_else(|| obj.get("toolName"))
127        .and_then(|n| n.as_str())
128    {
129        return Ok(Some(Event {
130            session_id: session_id.to_string(),
131            seq,
132            ts_ms,
133            ts_exact: true,
134            kind: EventKind::ToolCall,
135            source: EventSource::Tail,
136            tool: Some(name.to_string()),
137            tool_call_id: obj
138                .get("tool_call_id")
139                .or_else(|| obj.get("id"))
140                .and_then(|x| x.as_str())
141                .map(ToOwned::to_owned),
142            tokens_in: None,
143            tokens_out: None,
144            reasoning_tokens: None,
145            cost_usd_e6: None,
146            stop_reason: None,
147            latency_ms: None,
148            ttft_ms: None,
149            retry_count: None,
150            context_used_tokens: None,
151            context_max_tokens: None,
152            cache_creation_tokens: None,
153            cache_read_tokens: None,
154            system_prompt_tokens: None,
155            payload: v.clone(),
156        }));
157    }
158
159    Ok(None)
160}
161
162/// Scan one Copilot CLI session directory if it belongs to `workspace`.
163pub fn scan_copilot_cli_session_dir(
164    session_dir: &Path,
165    workspace: &Path,
166) -> Result<Option<(SessionRecord, Vec<Event>)>> {
167    let events_path = session_dir.join("events.jsonl");
168    if !events_path.is_file() {
169        return Ok(None);
170    }
171
172    let ws_match = if let Some(w) = session_workspace_path(session_dir) {
173        paths_equal(&w, workspace)
174    } else {
175        false
176    };
177    if !ws_match {
178        return Ok(None);
179    }
180
181    let session_id = session_dir
182        .file_name()
183        .and_then(|n| n.to_str())
184        .unwrap_or("copilot-cli")
185        .to_string();
186
187    let base_ts = dir_mtime_ms(session_dir);
188    let content = std::fs::read_to_string(&events_path)?;
189    let mut events = Vec::new();
190    let mut seq: u64 = 0;
191    let mut model: Option<String> = None;
192    for line in content.lines() {
193        if line.trim().is_empty() {
194            continue;
195        }
196        if model.is_none()
197            && let Ok(v) = serde_json::from_str::<Value>(line)
198        {
199            model = model_from_json::from_value(&v);
200        }
201        if let Some(ev) = parse_copilot_cli_line(&session_id, seq, base_ts, line)? {
202            events.push(ev);
203        }
204        seq += 1;
205    }
206
207    if events.is_empty() {
208        return Ok(None);
209    }
210
211    Ok(Some((
212        SessionRecord {
213            id: session_id,
214            agent: AGENT.to_string(),
215            model,
216            workspace: workspace.to_string_lossy().to_string(),
217            started_at_ms: dir_mtime_ms(session_dir),
218            ended_at_ms: None,
219            status: SessionStatus::Done,
220            trace_path: session_dir.to_string_lossy().to_string(),
221            start_commit: None,
222            end_commit: None,
223            branch: None,
224            dirty_start: None,
225            dirty_end: None,
226            repo_binding_source: None,
227            prompt_fingerprint: None,
228            parent_session_id: None,
229            agent_version: None,
230            os: None,
231            arch: None,
232            repo_file_count: None,
233            repo_total_loc: None,
234        },
235        events,
236    )))
237}
238
239/// All Copilot CLI sessions for this workspace.
240pub fn scan_copilot_cli_workspace(workspace: &Path) -> Result<Vec<(SessionRecord, Vec<Event>)>> {
241    let home = copilot_home();
242    let state = home.join("session-state");
243    if !state.is_dir() {
244        return Ok(vec![]);
245    }
246    let mut out = Vec::new();
247    for e in std::fs::read_dir(&state)? {
248        let e = e?;
249        let p = e.path();
250        if !p.is_dir() {
251            continue;
252        }
253        if let Some(pair) = scan_copilot_cli_session_dir(&p, workspace)? {
254            out.push(pair);
255        }
256    }
257    Ok(out)
258}
259
260#[cfg(test)]
261mod tests {
262    use super::*;
263    use tempfile::TempDir;
264
265    #[test]
266    fn copilot_cli_tool_calls_line() {
267        let line = r#"{"role":"assistant","tool_calls":[{"id":"call_1","type":"function","function":{"name":"run_terminal_cmd","arguments":"{}"}}],"timestamp_ms":1000}"#;
268        let ev = parse_copilot_cli_line("s1", 0, 0, line).unwrap().unwrap();
269        assert_eq!(ev.kind, EventKind::ToolCall);
270        assert_eq!(ev.tool.as_deref(), Some("run_terminal_cmd"));
271    }
272
273    #[test]
274    fn copilot_cli_session_fixture() {
275        let dir = TempDir::new().unwrap();
276        let ws = dir.path().join("repo");
277        std::fs::create_dir_all(&ws).unwrap();
278        let ws_canon = std::fs::canonicalize(&ws).unwrap();
279
280        let sess = dir.path().join("session-state/sess-abc");
281        std::fs::create_dir_all(&sess).unwrap();
282        std::fs::write(
283            sess.join("workspace.json"),
284            format!(
285                r#"{{"workspaceFolder": "{}"}}"#,
286                ws_canon.to_string_lossy().replace('\\', "\\\\")
287            ),
288        )
289        .unwrap();
290        let line = r#"{"role":"assistant","tool_calls":[{"id":"c1","type":"function","function":{"name":"read_file","arguments":"{}"}}],"timestamp_ms":5000}"#;
291        std::fs::write(sess.join("events.jsonl"), line).unwrap();
292
293        let pair = scan_copilot_cli_session_dir(&sess, &ws_canon)
294            .unwrap()
295            .expect("pair");
296        assert_eq!(pair.0.agent, "copilot-cli");
297        assert!(!pair.1.is_empty());
298    }
299}