use crate::adapters::{Adapter, AdapterError, RawRecord};
use crate::storage::LedgerRow;
use serde::{Deserialize, Serialize};
use std::io::{Read, Seek, SeekFrom};
use std::path::{Path, PathBuf};
const NON_CONVERSATION_TYPES: &[&str] = &[
"file-history-snapshot",
"permission-mode",
"queue-operation",
"summary",
];
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct ClaudeCursor {
pub file_path: PathBuf,
pub byte_offset: u64,
pub last_uuid: Option<String>,
}
pub struct ClaudeAdapter {
pub projects_root: Option<PathBuf>,
}
impl ClaudeAdapter {
pub fn new() -> Self {
Self {
projects_root: None,
}
}
pub fn with_projects_root(root: PathBuf) -> Self {
Self {
projects_root: Some(root),
}
}
fn projects_root(&self) -> Result<PathBuf, AdapterError> {
if let Some(p) = &self.projects_root {
return Ok(p.clone());
}
let home =
dirs::home_dir().ok_or_else(|| AdapterError::PathNotFound(PathBuf::from("$HOME")))?;
Ok(home.join(".claude").join("projects"))
}
}
impl Default for ClaudeAdapter {
fn default() -> Self {
Self::new()
}
}
impl Adapter for ClaudeAdapter {
type Cursor = ClaudeCursor;
fn name(&self) -> &'static str {
"claude"
}
fn detect(&self) -> Result<Option<PathBuf>, AdapterError> {
let root = self.projects_root()?;
if root.exists() {
Ok(Some(root))
} else {
Ok(None)
}
}
fn read_new_records(
&self,
since: &Self::Cursor,
) -> Result<(Vec<RawRecord>, Self::Cursor), AdapterError> {
let file_path = &since.file_path;
if file_path.as_os_str().is_empty() || !file_path.exists() {
let root = self.projects_root()?;
let discovered = find_newest_transcript(&root);
match discovered {
None => {
return Ok((
vec![],
ClaudeCursor {
file_path: PathBuf::new(),
byte_offset: 0,
last_uuid: since.last_uuid.clone(),
},
));
}
Some(p) => {
return self.read_new_records(&ClaudeCursor {
file_path: p,
byte_offset: 0,
last_uuid: None,
});
}
}
}
let root = self.projects_root()?;
let canonical_root = root
.canonicalize()
.map_err(|_| AdapterError::PathNotFound(root.clone()))?;
let canonical_file = file_path
.canonicalize()
.map_err(|_| AdapterError::PathNotFound(file_path.clone()))?;
if !canonical_file.starts_with(&canonical_root) {
return Err(AdapterError::PathNotFound(file_path.clone()));
}
let (line_records, new_offset) =
read_complete_lines(file_path, since.byte_offset, self.name())?;
let records: Vec<RawRecord> = line_records
.into_iter()
.map(|(end_offset, bytes)| RawRecord {
tool: self.name().to_string(),
payload: bytes,
offset: end_offset,
})
.collect();
let last_uuid = records
.iter()
.rev()
.find_map(|r| extract_uuid_from_payload(&r.payload))
.or_else(|| since.last_uuid.clone());
let advanced = ClaudeCursor {
file_path: file_path.clone(),
byte_offset: new_offset,
last_uuid,
};
Ok((records, advanced))
}
fn parse(&self, records: Vec<RawRecord>) -> Result<Vec<LedgerRow>, AdapterError> {
let mut rows = Vec::with_capacity(records.len());
for rec in records {
let v: serde_json::Value =
serde_json::from_slice(&rec.payload).map_err(|e| AdapterError::Parse {
offset: rec.offset,
context: "invalid JSON in transcript line",
source: e,
})?;
let row_type = v.get("type").and_then(|t| t.as_str()).unwrap_or("");
if NON_CONVERSATION_TYPES.contains(&row_type) {
continue;
}
let session_id = v
.get("sessionId")
.and_then(|s| s.as_str())
.ok_or_else(|| AdapterError::Parse {
offset: rec.offset,
context: "missing sessionId field",
source: make_missing_field_error(),
})?
.to_string();
let ts_val = v.get("timestamp").or_else(|| v.get("ts"));
if ts_val.is_none() {
continue;
}
let ts = parse_timestamp(ts_val, rec.offset)?;
let msg = v.get("message");
let role = msg
.and_then(|m| m.get("role"))
.or_else(|| v.get("role"))
.and_then(|r| r.as_str())
.or_else(|| v.get("type").and_then(|t| t.as_str()))
.ok_or_else(|| AdapterError::Parse {
offset: rec.offset,
context: "missing role and type fields",
source: make_missing_field_error(),
})?
.to_string();
let content_val = msg
.and_then(|m| m.get("content"))
.or_else(|| v.get("content"));
let content = match content_val {
None => String::new(),
Some(serde_json::Value::String(s)) => s.clone(),
Some(arr) => serde_json::to_string(arr).map_err(|e| AdapterError::Parse {
offset: rec.offset,
context: "failed to serialize content array",
source: e,
})?,
};
let tool_calls_json = extract_tool_calls(content_val);
let files_touched_json = extract_files_touched(content_val);
let parent_id = v
.get("parentUuid")
.and_then(|p| p.as_str())
.map(|s| s.to_string());
rows.push(LedgerRow {
session_id,
tool: "claude".to_string(),
ts,
role,
content,
tool_calls_json,
files_touched_json,
parent_id,
});
}
Ok(rows)
}
}
fn find_newest_transcript(projects_root: &Path) -> Option<PathBuf> {
let read_dir = std::fs::read_dir(projects_root).ok()?;
let mut newest: Option<(std::time::SystemTime, PathBuf)> = None;
for project_entry in read_dir.flatten() {
let project_path = project_entry.path();
if !project_path.is_dir() {
continue;
}
let Ok(inner) = std::fs::read_dir(&project_path) else {
continue;
};
for file_entry in inner.flatten() {
let p = file_entry.path();
if p.extension().and_then(|e| e.to_str()) != Some("jsonl") {
continue;
}
if let Ok(meta) = p.metadata() {
if let Ok(modified) = meta.modified() {
if newest.as_ref().map(|(t, _)| modified > *t).unwrap_or(true) {
newest = Some((modified, p));
}
}
}
}
}
newest.map(|(_, p)| p)
}
type LineRecords = Vec<(u64, Vec<u8>)>;
const MAX_READ_BYTES_PER_POLL: u64 = 64 * 1024 * 1024;
fn read_complete_lines(
file_path: &Path,
from_offset: u64,
tool: &str,
) -> Result<(LineRecords, u64), AdapterError> {
let _ = tool;
let mut file = std::fs::File::open(file_path)?;
file.seek(SeekFrom::Start(from_offset))?;
let mut buf = Vec::new();
file.take(MAX_READ_BYTES_PER_POLL).read_to_end(&mut buf)?;
if buf.is_empty() {
return Ok((vec![], from_offset));
}
let mut records = Vec::new();
let mut cursor = 0usize;
let mut byte_pos = from_offset;
while cursor < buf.len() {
match buf[cursor..].iter().position(|&b| b == b'\n') {
Some(nl_rel) => {
let line_end = cursor + nl_rel + 1; let line = buf[cursor..line_end].to_vec();
byte_pos += line.len() as u64;
records.push((byte_pos, line));
cursor = line_end;
}
None => {
break;
}
}
}
Ok((records, byte_pos))
}
fn parse_timestamp(val: Option<&serde_json::Value>, offset: u64) -> Result<i64, AdapterError> {
match val {
Some(serde_json::Value::Number(n)) => n.as_i64().ok_or_else(|| AdapterError::Parse {
offset,
context: "ts field is not a valid i64",
source: make_missing_field_error(),
}),
Some(serde_json::Value::String(s)) => {
if let Ok(n) = s.parse::<i64>() {
return Ok(n);
}
if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(s) {
return Ok(dt.timestamp_millis());
}
Err(AdapterError::Parse {
offset,
context: "ts/timestamp field is not parseable as epoch ms or ISO-8601",
source: make_missing_field_error(),
})
}
_ => Err(AdapterError::Parse {
offset,
context: "missing or non-numeric ts field",
source: make_missing_field_error(),
}),
}
}
fn extract_tool_calls(content: Option<&serde_json::Value>) -> Option<String> {
let arr = content?.as_array()?;
let tool_uses: Vec<&serde_json::Value> = arr
.iter()
.filter(|item| {
item.get("type")
.and_then(|t| t.as_str())
.map(|t| t == "tool_use")
.unwrap_or(false)
})
.collect();
if tool_uses.is_empty() {
return None;
}
serde_json::to_string(&tool_uses).ok()
}
fn extract_files_touched(content: Option<&serde_json::Value>) -> Option<String> {
let arr = content?.as_array()?;
let paths: Vec<&str> = arr
.iter()
.filter(|item| {
item.get("type")
.and_then(|t| t.as_str())
.map(|t| t == "tool_use")
.unwrap_or(false)
})
.filter_map(|item| {
item.get("input")
.and_then(|inp| inp.get("path"))
.and_then(|p| p.as_str())
})
.collect();
if paths.is_empty() {
return None;
}
serde_json::to_string(&paths).ok()
}
fn extract_uuid_from_payload(payload: &[u8]) -> Option<String> {
let v: serde_json::Value = serde_json::from_slice(payload).ok()?;
v.get("uuid")?.as_str().map(|s| s.to_string())
}
fn make_missing_field_error() -> serde_json::Error {
serde_json::from_str::<serde_json::Value>("").unwrap_err()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::adapters::Adapter;
fn fixture_path(name: &str) -> PathBuf {
PathBuf::from(concat!(
env!("CARGO_MANIFEST_DIR"),
"/tests/fixtures/claude"
))
.join(name)
}
fn fixture_root() -> PathBuf {
PathBuf::from(concat!(
env!("CARGO_MANIFEST_DIR"),
"/tests/fixtures/claude"
))
}
fn adapter() -> ClaudeAdapter {
ClaudeAdapter::with_projects_root(fixture_root())
}
fn cursor_for(name: &str) -> ClaudeCursor {
ClaudeCursor {
file_path: fixture_path(name),
byte_offset: 0,
last_uuid: None,
}
}
#[test]
fn parses_simple_conversation() {
let a = adapter();
let (records, _cursor) = a
.read_new_records(&cursor_for("1-simple-conversation.jsonl"))
.unwrap();
let rows = a.parse(records).unwrap();
assert_eq!(rows.len(), 3, "expected 3 LedgerRows");
assert_eq!(rows[0].role, "user");
assert_eq!(rows[1].role, "assistant");
assert_eq!(rows[2].role, "user");
for row in &rows {
assert_eq!(row.session_id, "session-simple-001");
assert_eq!(row.tool, "claude");
}
}
#[test]
fn preserves_parent_chain() {
let a = adapter();
let (records, _) = a
.read_new_records(&cursor_for("2-parentuuid-chain-deep.jsonl"))
.unwrap();
let rows = a.parse(records).unwrap();
assert!(
rows.len() >= 3,
"need at least 3 rows to verify chain depth >=2"
);
assert!(
rows[0].parent_id.is_none(),
"root row must have no parent_id"
);
for (i, row) in rows.iter().enumerate().skip(1) {
assert!(row.parent_id.is_some(), "row {} must have a parent_id", i);
}
let raw_lines =
std::fs::read_to_string(fixture_path("2-parentuuid-chain-deep.jsonl")).unwrap();
let values: Vec<serde_json::Value> = raw_lines
.lines()
.map(|l| serde_json::from_str(l).unwrap())
.collect();
for (i, row) in rows.iter().enumerate().skip(1) {
let expected_parent = values[i]
.get("parentUuid")
.and_then(|p| p.as_str())
.unwrap();
assert_eq!(
row.parent_id.as_deref(),
Some(expected_parent),
"row {} parent_id mismatch",
i
);
}
}
#[test]
fn extracts_tool_use_and_result() {
let a = adapter();
let (records, _) = a
.read_new_records(&cursor_for("3-tool-use-and-result.jsonl"))
.unwrap();
let rows = a.parse(records).unwrap();
let tool_use_row = rows
.iter()
.find(|r| r.tool_calls_json.is_some())
.expect("at least one row must have tool_calls_json");
assert!(tool_use_row.tool_calls_json.is_some());
assert!(
tool_use_row.files_touched_json.is_some(),
"tool_use row with path must have files_touched_json"
);
let files = tool_use_row.files_touched_json.as_ref().unwrap();
assert!(
files.contains("/synthetic/path/1/main.rs"),
"expected path in files_touched_json, got: {}",
files
);
}
#[test]
fn filters_non_conversation_types() {
let a = adapter();
let (records, _) = a
.read_new_records(&cursor_for("4-non-conversation-types.jsonl"))
.unwrap();
let total = records.len();
let rows = a.parse(records).unwrap();
assert_eq!(total, 9, "expected 9 raw records");
assert_eq!(
rows.len(),
5,
"expected 5 conversation rows after filtering"
);
for row in &rows {
assert!(
row.role == "user" || row.role == "assistant",
"unexpected role: {}",
row.role
);
}
}
#[test]
fn partial_line_tail_returns_complete_lines_and_stops_cursor() {
let a = adapter();
let cursor = cursor_for("5-partial-line-tail.jsonl");
let (records, advanced) = a.read_new_records(&cursor).unwrap();
assert!(
!records.is_empty(),
"should return the complete lines before partial tail"
);
assert_eq!(
advanced.byte_offset, 798,
"cursor must stop at last complete newline (798), got {}",
advanced.byte_offset
);
let (records2, advanced2) = a.read_new_records(&advanced).unwrap();
assert!(records2.is_empty(), "no new complete lines on second read");
assert_eq!(advanced2.byte_offset, advanced.byte_offset, "cursor stable");
}
#[test]
fn cursor_monotonic_across_reads() {
let a = adapter();
let cursor0 = cursor_for("1-simple-conversation.jsonl");
let (records1, cursor1) = a.read_new_records(&cursor0).unwrap();
assert!(!records1.is_empty(), "first read should return records");
assert!(
cursor1.byte_offset >= cursor0.byte_offset,
"cursor must not regress"
);
let (records2, cursor2) = a.read_new_records(&cursor1).unwrap();
assert!(records2.is_empty(), "no new records on second read");
assert_eq!(
cursor2.byte_offset, cursor1.byte_offset,
"cursor must be stable"
);
}
#[test]
fn detect_returns_root_when_present() {
let dir = tempfile::tempdir().unwrap();
let a = ClaudeAdapter::with_projects_root(dir.path().to_path_buf());
let result = a.detect().unwrap();
assert!(result.is_some(), "detect must return Some when dir exists");
assert_eq!(result.unwrap(), dir.path());
}
#[test]
fn detect_returns_none_when_absent() {
let a = ClaudeAdapter::with_projects_root(PathBuf::from(
"/tmp/carryover-test-nonexistent-dir-xyz-987654",
));
let result = a.detect().unwrap();
assert!(
result.is_none(),
"detect must return None when dir is absent"
);
}
}