Skip to main content

kaizen/collect/tail/
copilot_cli.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! Ingest GitHub Copilot CLI sessions from `~/.copilot/session-state/<id>/events.jsonl`.
3
4use crate::collect::model_from_json;
5use crate::collect::tail::dir_mtime_ms;
6use crate::core::event::{Event, EventKind, EventSource, SessionRecord, SessionStatus};
7use anyhow::{Context, Result};
8use serde_json::Value;
9use std::path::{Path, PathBuf};
10
11const AGENT: &str = "copilot-cli";
12
13fn copilot_home() -> PathBuf {
14    if let Ok(p) = std::env::var("COPILOT_HOME") {
15        return PathBuf::from(p);
16    }
17    if let Ok(h) = std::env::var("HOME") {
18        return PathBuf::from(h).join(".copilot");
19    }
20    PathBuf::from(".copilot")
21}
22
23fn canonical(p: &Path) -> PathBuf {
24    std::fs::canonicalize(p).unwrap_or_else(|_| p.to_path_buf())
25}
26
27fn paths_equal(a: &Path, b: &Path) -> bool {
28    canonical(a) == canonical(b)
29}
30
31fn session_workspace_path(session_dir: &Path) -> Option<PathBuf> {
32    let wj = session_dir.join("workspace.json");
33    if let Ok(text) = std::fs::read_to_string(&wj)
34        && let Ok(v) = serde_json::from_str::<Value>(&text)
35    {
36        for key in ["workspaceFolder", "cwd", "workingDirectory", "folder"] {
37            if let Some(s) = v.get(key).and_then(|x| x.as_str()) {
38                let p = s.strip_prefix("file://").unwrap_or(s);
39                return Some(PathBuf::from(p));
40            }
41        }
42    }
43    let meta = session_dir.join("metadata.json");
44    if let Ok(text) = std::fs::read_to_string(&meta)
45        && let Ok(v) = serde_json::from_str::<Value>(&text)
46        && let Some(s) = v
47            .get("workspaceFolder")
48            .or_else(|| v.get("cwd"))
49            .and_then(|x| x.as_str())
50    {
51        let p = s.strip_prefix("file://").unwrap_or(s);
52        return Some(PathBuf::from(p));
53    }
54    None
55}
56
57/// Parse one line from Copilot CLI `events.jsonl`.
58pub fn parse_copilot_cli_line(
59    session_id: &str,
60    seq: u64,
61    base_ts: u64,
62    line: &str,
63) -> Result<Option<Event>> {
64    let v: Value = serde_json::from_str(line.trim()).context("copilot cli jsonl")?;
65    let obj = match v.as_object() {
66        Some(o) => o,
67        None => return Ok(None),
68    };
69
70    let ts_ms = obj
71        .get("timestamp_ms")
72        .or_else(|| obj.get("timestamp"))
73        .and_then(|t| t.as_u64())
74        .unwrap_or(base_ts + seq);
75
76    if let Some(tool_calls) = obj.get("tool_calls").and_then(|t| t.as_array())
77        && let Some(first) = tool_calls.first()
78    {
79        let tool_name = first
80            .get("function")
81            .and_then(|f| f.get("name"))
82            .or_else(|| first.get("name"))
83            .and_then(|n| n.as_str())
84            .unwrap_or("")
85            .to_string();
86        return Ok(Some(Event {
87            session_id: session_id.to_string(),
88            seq,
89            ts_ms,
90            ts_exact: true,
91            kind: EventKind::ToolCall,
92            source: EventSource::Tail,
93            tool: Some(tool_name),
94            tool_call_id: first
95                .get("id")
96                .and_then(|x| x.as_str())
97                .map(ToOwned::to_owned),
98            tokens_in: obj
99                .get("usage")
100                .and_then(|u| u.get("prompt_tokens"))
101                .and_then(|x| x.as_u64())
102                .map(|x| x as u32),
103            tokens_out: obj
104                .get("usage")
105                .and_then(|u| u.get("completion_tokens"))
106                .and_then(|x| x.as_u64())
107                .map(|x| x as u32),
108            reasoning_tokens: None,
109            cost_usd_e6: None,
110            payload: v.clone(),
111        }));
112    }
113
114    if let Some(name) = obj
115        .get("tool")
116        .and_then(|t| t.get("name"))
117        .or_else(|| obj.get("toolName"))
118        .and_then(|n| n.as_str())
119    {
120        return Ok(Some(Event {
121            session_id: session_id.to_string(),
122            seq,
123            ts_ms,
124            ts_exact: true,
125            kind: EventKind::ToolCall,
126            source: EventSource::Tail,
127            tool: Some(name.to_string()),
128            tool_call_id: obj
129                .get("tool_call_id")
130                .or_else(|| obj.get("id"))
131                .and_then(|x| x.as_str())
132                .map(ToOwned::to_owned),
133            tokens_in: None,
134            tokens_out: None,
135            reasoning_tokens: None,
136            cost_usd_e6: None,
137            payload: v.clone(),
138        }));
139    }
140
141    Ok(None)
142}
143
144/// Scan one Copilot CLI session directory if it belongs to `workspace`.
145pub fn scan_copilot_cli_session_dir(
146    session_dir: &Path,
147    workspace: &Path,
148) -> Result<Option<(SessionRecord, Vec<Event>)>> {
149    let events_path = session_dir.join("events.jsonl");
150    if !events_path.is_file() {
151        return Ok(None);
152    }
153
154    let ws_match = if let Some(w) = session_workspace_path(session_dir) {
155        paths_equal(&w, workspace)
156    } else {
157        false
158    };
159    if !ws_match {
160        return Ok(None);
161    }
162
163    let session_id = session_dir
164        .file_name()
165        .and_then(|n| n.to_str())
166        .unwrap_or("copilot-cli")
167        .to_string();
168
169    let base_ts = dir_mtime_ms(session_dir);
170    let content = std::fs::read_to_string(&events_path)?;
171    let mut events = Vec::new();
172    let mut seq: u64 = 0;
173    let mut model: Option<String> = None;
174    for line in content.lines() {
175        if line.trim().is_empty() {
176            continue;
177        }
178        if model.is_none()
179            && let Ok(v) = serde_json::from_str::<Value>(line)
180        {
181            model = model_from_json::from_value(&v);
182        }
183        if let Some(ev) = parse_copilot_cli_line(&session_id, seq, base_ts, line)? {
184            events.push(ev);
185        }
186        seq += 1;
187    }
188
189    if events.is_empty() {
190        return Ok(None);
191    }
192
193    Ok(Some((
194        SessionRecord {
195            id: session_id,
196            agent: AGENT.to_string(),
197            model,
198            workspace: workspace.to_string_lossy().to_string(),
199            started_at_ms: dir_mtime_ms(session_dir),
200            ended_at_ms: None,
201            status: SessionStatus::Done,
202            trace_path: session_dir.to_string_lossy().to_string(),
203            start_commit: None,
204            end_commit: None,
205            branch: None,
206            dirty_start: None,
207            dirty_end: None,
208            repo_binding_source: None,
209        },
210        events,
211    )))
212}
213
214/// All Copilot CLI sessions for this workspace.
215pub fn scan_copilot_cli_workspace(workspace: &Path) -> Result<Vec<(SessionRecord, Vec<Event>)>> {
216    let home = copilot_home();
217    let state = home.join("session-state");
218    if !state.is_dir() {
219        return Ok(vec![]);
220    }
221    let mut out = Vec::new();
222    for e in std::fs::read_dir(&state)? {
223        let e = e?;
224        let p = e.path();
225        if !p.is_dir() {
226            continue;
227        }
228        if let Some(pair) = scan_copilot_cli_session_dir(&p, workspace)? {
229            out.push(pair);
230        }
231    }
232    Ok(out)
233}
234
235#[cfg(test)]
236mod tests {
237    use super::*;
238    use tempfile::TempDir;
239
240    #[test]
241    fn copilot_cli_tool_calls_line() {
242        let line = r#"{"role":"assistant","tool_calls":[{"id":"call_1","type":"function","function":{"name":"run_terminal_cmd","arguments":"{}"}}],"timestamp_ms":1000}"#;
243        let ev = parse_copilot_cli_line("s1", 0, 0, line).unwrap().unwrap();
244        assert_eq!(ev.kind, EventKind::ToolCall);
245        assert_eq!(ev.tool.as_deref(), Some("run_terminal_cmd"));
246    }
247
248    #[test]
249    fn copilot_cli_session_fixture() {
250        let dir = TempDir::new().unwrap();
251        let ws = dir.path().join("repo");
252        std::fs::create_dir_all(&ws).unwrap();
253        let ws_canon = std::fs::canonicalize(&ws).unwrap();
254
255        let sess = dir.path().join("session-state/sess-abc");
256        std::fs::create_dir_all(&sess).unwrap();
257        std::fs::write(
258            sess.join("workspace.json"),
259            format!(
260                r#"{{"workspaceFolder": "{}"}}"#,
261                ws_canon.to_string_lossy().replace('\\', "\\\\")
262            ),
263        )
264        .unwrap();
265        let line = r#"{"role":"assistant","tool_calls":[{"id":"c1","type":"function","function":{"name":"read_file","arguments":"{}"}}],"timestamp_ms":5000}"#;
266        std::fs::write(sess.join("events.jsonl"), line).unwrap();
267
268        let pair = scan_copilot_cli_session_dir(&sess, &ws_canon)
269            .unwrap()
270            .expect("pair");
271        assert_eq!(pair.0.agent, "copilot-cli");
272        assert!(!pair.1.is_empty());
273    }
274}