use std::path::Path;
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct Pair {
pub user_prompt: String,
pub assistant_text: String,
}
pub const PAIR_MAX_CHARS: usize = 2_000;
pub const TOTAL_MAX_CHARS: usize = 40_000;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Platform {
ClaudeCode,
Cursor,
GeminiCli,
Windsurf,
}
impl Platform {
pub fn from_client_name(client: &str) -> Self {
match client.to_ascii_lowercase().as_str() {
"cursor" => Self::Cursor,
"gemini-cli" | "gemini_cli" | "gemini" => Self::GeminiCli,
"windsurf" => Self::Windsurf,
_ => Self::ClaudeCode,
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct ExtractArgs<'a> {
pub platform: Platform,
pub transcript_path: Option<&'a str>,
pub session_id: Option<&'a str>,
pub max_pairs: usize,
}
pub fn extract_recent_session_pairs(args: ExtractArgs<'_>) -> std::io::Result<Vec<Pair>> {
if args.max_pairs == 0 {
return Ok(Vec::new());
}
match args.platform {
Platform::ClaudeCode => extract_from_claude_code(&args),
_ => Ok(Vec::new()),
}
}
const MAX_TRANSCRIPT_BYTES: u64 = 32 * 1024 * 1024;
fn read_transcript_tail_capped(path: &str) -> std::io::Result<String> {
use std::io::{Read, Seek, SeekFrom};
let mut file = std::fs::File::open(path)?;
let len = file.metadata()?.len();
if len > MAX_TRANSCRIPT_BYTES {
file.seek(SeekFrom::Start(len - MAX_TRANSCRIPT_BYTES))?;
}
let mut buf = Vec::new();
file.take(MAX_TRANSCRIPT_BYTES).read_to_end(&mut buf)?;
Ok(String::from_utf8_lossy(&buf).into_owned())
}
fn extract_from_claude_code(args: &ExtractArgs<'_>) -> std::io::Result<Vec<Pair>> {
let Some(transcript_path) = args.transcript_path else {
return Ok(Vec::new());
};
if transcript_path.trim().is_empty() {
return Ok(Vec::new());
}
if !Path::new(transcript_path).exists() {
return Ok(Vec::new());
}
let body = read_transcript_tail_capped(transcript_path)?;
let mut pending_user: Option<String> = None;
let mut pairs: Vec<Pair> = Vec::new();
let mut total_chars: usize = 0;
for line in body.lines() {
let line = line.trim();
if line.is_empty() {
continue;
}
let value: serde_json::Value = match serde_json::from_str(line) {
Ok(v) => v,
Err(_) => continue,
};
match row_role(&value) {
Some(Role::User) => {
pending_user = extract_text_content(&value);
}
Some(Role::Assistant) => {
let assistant_text = extract_text_content(&value).unwrap_or_default();
if assistant_text.trim().is_empty() {
continue;
}
let Some(user_prompt) = pending_user.take() else {
continue;
};
if user_prompt.trim().is_empty() {
continue;
}
let pair = build_capped_pair(&user_prompt, &assistant_text);
let pair_len = pair.user_prompt.chars().count()
+ pair.assistant_text.chars().count();
if total_chars.saturating_add(pair_len) > TOTAL_MAX_CHARS {
break;
}
total_chars += pair_len;
pairs.push(pair);
}
_ => {}
}
}
let len = pairs.len();
let start = len.saturating_sub(args.max_pairs);
Ok(pairs.split_off(start))
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Role {
User,
Assistant,
}
fn row_role(value: &serde_json::Value) -> Option<Role> {
let top = value.get("type").and_then(serde_json::Value::as_str);
let nested = value
.get("message")
.and_then(|m| m.get("role"))
.and_then(serde_json::Value::as_str);
let any = top.or(nested)?;
match any {
"user" => Some(Role::User),
"assistant" => Some(Role::Assistant),
_ => None,
}
}
fn extract_text_content(value: &serde_json::Value) -> Option<String> {
let content = value.get("message").and_then(|m| m.get("content"))?;
if let Some(s) = content.as_str() {
return Some(s.to_owned());
}
let arr = content.as_array()?;
let mut buf = String::new();
for part in arr {
let part_type = part
.get("type")
.and_then(serde_json::Value::as_str)
.unwrap_or("");
if part_type != "text" {
continue;
}
if let Some(text) = part.get("text").and_then(serde_json::Value::as_str) {
if !buf.is_empty() {
buf.push('\n');
}
buf.push_str(text);
}
}
if buf.is_empty() { None } else { Some(buf) }
}
fn build_capped_pair(user_prompt: &str, assistant_text: &str) -> Pair {
let user = truncate_chars(user_prompt.trim(), PAIR_MAX_CHARS / 2);
let assistant = truncate_chars(assistant_text.trim(), PAIR_MAX_CHARS / 2);
Pair {
user_prompt: user,
assistant_text: assistant,
}
}
fn truncate_chars(s: &str, max_chars: usize) -> String {
if s.chars().count() <= max_chars {
return s.to_owned();
}
let mut out: String = s.chars().take(max_chars.saturating_sub(1)).collect();
out.push('…');
out
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
fn write_jsonl(lines: &[&str]) -> tempfile::NamedTempFile {
let mut f = tempfile::Builder::new()
.suffix(".jsonl")
.tempfile()
.expect("tempfile");
for line in lines {
writeln!(f, "{line}").expect("write jsonl line");
}
f
}
fn cc_args(transcript_path: Option<&str>, max_pairs: usize) -> ExtractArgs<'_> {
ExtractArgs {
platform: Platform::ClaudeCode,
transcript_path,
session_id: Some("sess_t"),
max_pairs,
}
}
#[test]
fn missing_transcript_path_returns_empty_not_error() {
let args = cc_args(None, 10);
let pairs = extract_recent_session_pairs(args).expect("no panic");
assert!(pairs.is_empty());
let args = cc_args(Some("/definitely/does/not/exist"), 10);
let pairs = extract_recent_session_pairs(args).expect("no panic");
assert!(pairs.is_empty());
}
#[test]
fn extracts_pairs_in_order_and_strips_tool_calls() {
let f = write_jsonl(&[
r#"{"type":"user","message":{"role":"user","content":"please fix the bug"}}"#,
r#"{"type":"assistant","message":{"role":"assistant","content":[{"type":"tool_use","id":"t1","name":"Edit","input":{}},{"type":"text","text":"fixed the panic in unwrap"}]}}"#,
r#"{"type":"user","message":{"role":"user","content":"add tests"}}"#,
r#"{"type":"assistant","message":{"role":"assistant","content":[{"type":"thinking","thinking":"hmm"},{"type":"text","text":"added two test cases"}]}}"#,
]);
let path = f.path().to_str().unwrap().to_owned();
let pairs = extract_recent_session_pairs(cc_args(Some(&path), 10)).expect("ok");
assert_eq!(pairs.len(), 2);
assert_eq!(pairs[0].user_prompt, "please fix the bug");
assert_eq!(pairs[0].assistant_text, "fixed the panic in unwrap");
assert_eq!(pairs[1].user_prompt, "add tests");
assert_eq!(pairs[1].assistant_text, "added two test cases");
}
#[test]
fn drops_thinking_blocks_from_assistant_text() {
let f = write_jsonl(&[
r#"{"type":"user","message":{"role":"user","content":"q"}}"#,
r#"{"type":"assistant","message":{"role":"assistant","content":[{"type":"thinking","thinking":"private chain-of-thought"},{"type":"text","text":"final answer"}]}}"#,
]);
let path = f.path().to_str().unwrap().to_owned();
let pairs = extract_recent_session_pairs(cc_args(Some(&path), 10)).expect("ok");
assert_eq!(pairs.len(), 1);
assert_eq!(pairs[0].assistant_text, "final answer");
assert!(
!pairs[0]
.assistant_text
.contains("private chain-of-thought"),
"thinking blocks must never reach the gate"
);
}
#[test]
fn max_pairs_limits_returned_window_to_recent_n() {
let f = write_jsonl(&[
r#"{"type":"user","message":{"role":"user","content":"u1"}}"#,
r#"{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"a1"}]}}"#,
r#"{"type":"user","message":{"role":"user","content":"u2"}}"#,
r#"{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"a2"}]}}"#,
r#"{"type":"user","message":{"role":"user","content":"u3"}}"#,
r#"{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"a3"}]}}"#,
]);
let path = f.path().to_str().unwrap().to_owned();
let pairs = extract_recent_session_pairs(cc_args(Some(&path), 2)).expect("ok");
assert_eq!(pairs.len(), 2);
assert_eq!(pairs[0].user_prompt, "u2");
assert_eq!(pairs[1].user_prompt, "u3");
}
#[test]
fn empty_assistant_text_pair_is_dropped() {
let f = write_jsonl(&[
r#"{"type":"user","message":{"role":"user","content":"go"}}"#,
r#"{"type":"assistant","message":{"role":"assistant","content":[{"type":"tool_use","id":"t1","name":"Edit","input":{}}]}}"#,
]);
let path = f.path().to_str().unwrap().to_owned();
let pairs = extract_recent_session_pairs(cc_args(Some(&path), 10)).expect("ok");
assert!(pairs.is_empty());
}
#[test]
fn cursor_platform_returns_empty_until_adapter_lands() {
let args = ExtractArgs {
platform: Platform::Cursor,
transcript_path: Some("/tmp/whatever"),
session_id: Some("sess"),
max_pairs: 10,
};
let pairs = extract_recent_session_pairs(args).expect("ok");
assert!(pairs.is_empty());
}
}