1use crate::error::IngestError;
4use cai_core::{Entry, Metadata, Source};
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::fs;
9use std::path::{Path, PathBuf};
10use tracing::debug;
11
12#[derive(Debug, Serialize, Deserialize)]
14struct ClaudeConversation {
15 #[serde(skip)]
17 id: String,
18 messages: Vec<ClaudeMessage>,
20 #[serde(default)]
22 metadata: ClaudeMetadata,
23}
24
25#[derive(Debug, Serialize, Deserialize)]
26struct ClaudeMessage {
27 role: String,
29 content: String,
31 #[serde(default)]
33 timestamp: Option<String>,
34}
35
36#[derive(Debug, Serialize, Deserialize, Default)]
37struct ClaudeMetadata {
38 #[serde(rename = "projectPath")]
40 project_path: Option<String>,
41 repo_url: Option<String>,
43}
44
45pub struct ClaudeParser {
47 conversations_dir: PathBuf,
49}
50
51impl ClaudeParser {
52 pub fn new<P: AsRef<Path>>(conversations_dir: P) -> Self {
57 Self {
58 conversations_dir: conversations_dir.as_ref().to_path_buf(),
59 }
60 }
61
62 pub fn with_default_path() -> Result<Self, IngestError> {
64 let home = dirs::home_dir()
65 .ok_or_else(|| IngestError::PathNotFound("Home directory not found".to_string()))?;
66 Ok(Self::new(home.join(".claude/conversations")))
67 }
68
69 pub fn parse_all(&self) -> Result<Vec<Entry>, IngestError> {
71 let entries = fs::read_dir(&self.conversations_dir).map_err(|e| {
72 IngestError::PathNotFound(format!("{}: {}", self.conversations_dir.display(), e))
73 })?;
74
75 let mut results = Vec::new();
76
77 for entry in entries {
78 let entry =
79 entry.map_err(|e| IngestError::PermissionDenied(format!("read dir: {}", e)))?;
80 let path = entry.path();
81
82 if path.extension().and_then(|s| s.to_str()) != Some("json") {
83 continue;
84 }
85
86 debug!("Parsing Claude conversation: {}", path.display());
87 match self.parse_file(&path) {
88 Ok(conversation_entries) => {
89 results.extend(conversation_entries);
90 }
91 Err(e) => {
92 tracing::warn!("Failed to parse {}: {}", path.display(), e);
93 }
94 }
95 }
96
97 if results.is_empty() {
98 return Err(IngestError::NoFilesFound(
99 self.conversations_dir.display().to_string(),
100 ));
101 }
102
103 Ok(results)
104 }
105
106 fn parse_file(&self, path: &Path) -> Result<Vec<Entry>, IngestError> {
108 let content = fs::read_to_string(path)
109 .map_err(|e| IngestError::InvalidFormat(format!("read failed: {}", e)))?;
110
111 let conversation_id = path
112 .file_stem()
113 .and_then(|s| s.to_str())
114 .unwrap_or("unknown")
115 .to_string();
116
117 if let Ok(entries) = serde_json::from_str::<Vec<ClaudeMessage>>(&content) {
119 return self.messages_to_entries(&conversation_id, entries, &None);
120 }
121
122 let mut conv: ClaudeConversation = serde_json::from_str(&content)
124 .map_err(|e| IngestError::InvalidFormat(format!("JSON parse: {}", e)))?;
125 conv.id = conversation_id;
126
127 self.messages_to_entries(&conv.id, conv.messages, &Some(conv.metadata))
128 }
129
130 fn messages_to_entries(
131 &self,
132 conversation_id: &str,
133 messages: Vec<ClaudeMessage>,
134 metadata: &Option<ClaudeMetadata>,
135 ) -> Result<Vec<Entry>, IngestError> {
136 let mut entries = Vec::new();
137 let mut i = 0;
138
139 while i < messages.len() {
141 let msg = &messages[i];
142
143 if msg.role == "user" {
144 let prompt = msg.content.clone();
145 let timestamp = parse_timestamp(&msg.timestamp);
146
147 let response = if i + 1 < messages.len() && messages[i + 1].role == "assistant" {
149 messages[i + 1].content.clone()
150 } else {
151 String::new()
152 };
153
154 let meta = metadata
155 .as_ref()
156 .map(|m| Metadata {
157 file_path: Some(m.project_path.clone().unwrap_or_default()),
158 repo_url: m.repo_url.clone(),
159 commit_hash: None,
160 language: None,
161 extra: HashMap::from([
162 ("conversation_id".to_string(), conversation_id.to_string()),
163 ("message_index".to_string(), i.to_string()),
164 ]),
165 })
166 .unwrap_or_else(|| Metadata {
167 file_path: None,
168 repo_url: None,
169 commit_hash: None,
170 language: None,
171 extra: HashMap::from([(
172 "conversation_id".to_string(),
173 conversation_id.to_string(),
174 )]),
175 });
176
177 entries.push(Entry {
178 id: format!("claude-{}-{}", conversation_id, i),
179 source: Source::Claude,
180 timestamp,
181 prompt,
182 response,
183 metadata: meta,
184 });
185 }
186
187 i += 1;
188 }
189
190 Ok(entries)
191 }
192}
193
194fn parse_timestamp(ts: &Option<String>) -> DateTime<Utc> {
195 ts.as_ref()
196 .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
197 .map(|dt: DateTime<chrono::FixedOffset>| dt.with_timezone(&Utc))
198 .unwrap_or_else(Utc::now)
199}
200
201#[cfg(test)]
202mod tests {
203 use super::*;
204 use tempfile::TempDir;
205
206 #[test]
207 fn test_parse_claude_conversation() {
208 let temp_dir = TempDir::new().unwrap();
209 let conv_path = temp_dir.path().join("test-conversation.json");
210
211 let json = r#"{
212 "messages": [
213 {
214 "role": "user",
215 "content": "help me write a function",
216 "timestamp": "2024-01-15T10:30:00Z"
217 },
218 {
219 "role": "assistant",
220 "content": "Here's how to write a function...",
221 "timestamp": "2024-01-15T10:30:01Z"
222 }
223 ],
224 "metadata": {
225 "projectPath": "/Users/user/project"
226 }
227 }"#;
228
229 fs::write(&conv_path, json).unwrap();
230
231 let parser = ClaudeParser::new(temp_dir.path());
232 let entries = parser.parse_all().unwrap();
233
234 assert_eq!(entries.len(), 1);
235 let entry = &entries[0];
236 assert!(entry.id.starts_with("claude-test-conversation-"));
237 assert_eq!(entry.source, Source::Claude);
238 assert_eq!(entry.prompt, "help me write a function");
239 assert_eq!(entry.response, "Here's how to write a function...");
240 assert_eq!(
241 entry.metadata.file_path,
242 Some("/Users/user/project".to_string())
243 );
244 }
245
246 #[test]
247 fn test_parse_conversation_array_format() {
248 let temp_dir = TempDir::new().unwrap();
249 let conv_path = temp_dir.path().join("array-format.json");
250
251 let json = r#"[
252 {
253 "role": "user",
254 "content": "test question"
255 },
256 {
257 "role": "assistant",
258 "content": "test answer"
259 }
260 ]"#;
261
262 fs::write(&conv_path, json).unwrap();
263
264 let parser = ClaudeParser::new(temp_dir.path());
265 let entries = parser.parse_all().unwrap();
266
267 assert_eq!(entries.len(), 1);
268 assert_eq!(entries[0].prompt, "test question");
269 assert_eq!(entries[0].response, "test answer");
270 }
271}