Skip to main content

cai_ingest/
claude.rs

1//! Claude Code conversation parser
2
3use crate::error::IngestError;
4use cai_core::{Entry, Metadata, Source};
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::fs;
9use std::path::{Path, PathBuf};
10use tracing::debug;
11
12/// Claude Code conversation file format
13#[derive(Debug, Serialize, Deserialize)]
14struct ClaudeConversation {
15    /// Conversation ID (filename)
16    #[serde(skip)]
17    id: String,
18    /// Messages in the conversation
19    messages: Vec<ClaudeMessage>,
20    /// Optional metadata
21    #[serde(default)]
22    metadata: ClaudeMetadata,
23}
24
25#[derive(Debug, Serialize, Deserialize)]
26struct ClaudeMessage {
27    /// Role (user/assistant)
28    role: String,
29    /// Message content
30    content: String,
31    /// Timestamp (ISO 8601)
32    #[serde(default)]
33    timestamp: Option<String>,
34}
35
36#[derive(Debug, Serialize, Deserialize, Default)]
37struct ClaudeMetadata {
38    /// Project path
39    #[serde(rename = "projectPath")]
40    project_path: Option<String>,
41    /// Repository URL
42    repo_url: Option<String>,
43}
44
45/// Parser for Claude Code conversations
46pub struct ClaudeParser {
47    /// Path to conversations directory
48    conversations_dir: PathBuf,
49}
50
51impl ClaudeParser {
52    /// Create a new Claude parser
53    ///
54    /// # Arguments
55    /// * `conversations_dir` - Path to ~/.claude/conversations directory
56    pub fn new<P: AsRef<Path>>(conversations_dir: P) -> Self {
57        Self {
58            conversations_dir: conversations_dir.as_ref().to_path_buf(),
59        }
60    }
61
62    /// Create parser with default path
63    pub fn with_default_path() -> Result<Self, IngestError> {
64        let home = dirs::home_dir()
65            .ok_or_else(|| IngestError::PathNotFound("Home directory not found".to_string()))?;
66        Ok(Self::new(home.join(".claude/conversations")))
67    }
68
69    /// Parse all conversations from the directory
70    pub fn parse_all(&self) -> Result<Vec<Entry>, IngestError> {
71        let entries = fs::read_dir(&self.conversations_dir).map_err(|e| {
72            IngestError::PathNotFound(format!("{}: {}", self.conversations_dir.display(), e))
73        })?;
74
75        let mut results = Vec::new();
76
77        for entry in entries {
78            let entry =
79                entry.map_err(|e| IngestError::PermissionDenied(format!("read dir: {}", e)))?;
80            let path = entry.path();
81
82            if path.extension().and_then(|s| s.to_str()) != Some("json") {
83                continue;
84            }
85
86            debug!("Parsing Claude conversation: {}", path.display());
87            match self.parse_file(&path) {
88                Ok(conversation_entries) => {
89                    results.extend(conversation_entries);
90                }
91                Err(e) => {
92                    tracing::warn!("Failed to parse {}: {}", path.display(), e);
93                }
94            }
95        }
96
97        if results.is_empty() {
98            return Err(IngestError::NoFilesFound(
99                self.conversations_dir.display().to_string(),
100            ));
101        }
102
103        Ok(results)
104    }
105
106    /// Parse a single conversation file
107    fn parse_file(&self, path: &Path) -> Result<Vec<Entry>, IngestError> {
108        let content = fs::read_to_string(path)
109            .map_err(|e| IngestError::InvalidFormat(format!("read failed: {}", e)))?;
110
111        let conversation_id = path
112            .file_stem()
113            .and_then(|s| s.to_str())
114            .unwrap_or("unknown")
115            .to_string();
116
117        // Try parsing as direct Entry array first (newer format)
118        if let Ok(entries) = serde_json::from_str::<Vec<ClaudeMessage>>(&content) {
119            return self.messages_to_entries(&conversation_id, entries, &None);
120        }
121
122        // Try parsing as Conversation object (older format)
123        let mut conv: ClaudeConversation = serde_json::from_str(&content)
124            .map_err(|e| IngestError::InvalidFormat(format!("JSON parse: {}", e)))?;
125        conv.id = conversation_id;
126
127        self.messages_to_entries(&conv.id, conv.messages, &Some(conv.metadata))
128    }
129
130    fn messages_to_entries(
131        &self,
132        conversation_id: &str,
133        messages: Vec<ClaudeMessage>,
134        metadata: &Option<ClaudeMetadata>,
135    ) -> Result<Vec<Entry>, IngestError> {
136        let mut entries = Vec::new();
137        let mut i = 0;
138
139        // Pair user messages with assistant responses
140        while i < messages.len() {
141            let msg = &messages[i];
142
143            if msg.role == "user" {
144                let prompt = msg.content.clone();
145                let timestamp = parse_timestamp(&msg.timestamp);
146
147                // Look for assistant response
148                let response = if i + 1 < messages.len() && messages[i + 1].role == "assistant" {
149                    messages[i + 1].content.clone()
150                } else {
151                    String::new()
152                };
153
154                let meta = metadata
155                    .as_ref()
156                    .map(|m| Metadata {
157                        file_path: Some(m.project_path.clone().unwrap_or_default()),
158                        repo_url: m.repo_url.clone(),
159                        commit_hash: None,
160                        language: None,
161                        extra: HashMap::from([
162                            ("conversation_id".to_string(), conversation_id.to_string()),
163                            ("message_index".to_string(), i.to_string()),
164                        ]),
165                    })
166                    .unwrap_or_else(|| Metadata {
167                        file_path: None,
168                        repo_url: None,
169                        commit_hash: None,
170                        language: None,
171                        extra: HashMap::from([(
172                            "conversation_id".to_string(),
173                            conversation_id.to_string(),
174                        )]),
175                    });
176
177                entries.push(Entry {
178                    id: format!("claude-{}-{}", conversation_id, i),
179                    source: Source::Claude,
180                    timestamp,
181                    prompt,
182                    response,
183                    metadata: meta,
184                });
185            }
186
187            i += 1;
188        }
189
190        Ok(entries)
191    }
192}
193
194fn parse_timestamp(ts: &Option<String>) -> DateTime<Utc> {
195    ts.as_ref()
196        .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
197        .map(|dt: DateTime<chrono::FixedOffset>| dt.with_timezone(&Utc))
198        .unwrap_or_else(Utc::now)
199}
200
201#[cfg(test)]
202mod tests {
203    use super::*;
204    use tempfile::TempDir;
205
206    #[test]
207    fn test_parse_claude_conversation() {
208        let temp_dir = TempDir::new().unwrap();
209        let conv_path = temp_dir.path().join("test-conversation.json");
210
211        let json = r#"{
212            "messages": [
213                {
214                    "role": "user",
215                    "content": "help me write a function",
216                    "timestamp": "2024-01-15T10:30:00Z"
217                },
218                {
219                    "role": "assistant",
220                    "content": "Here's how to write a function...",
221                    "timestamp": "2024-01-15T10:30:01Z"
222                }
223            ],
224            "metadata": {
225                "projectPath": "/Users/user/project"
226            }
227        }"#;
228
229        fs::write(&conv_path, json).unwrap();
230
231        let parser = ClaudeParser::new(temp_dir.path());
232        let entries = parser.parse_all().unwrap();
233
234        assert_eq!(entries.len(), 1);
235        let entry = &entries[0];
236        assert!(entry.id.starts_with("claude-test-conversation-"));
237        assert_eq!(entry.source, Source::Claude);
238        assert_eq!(entry.prompt, "help me write a function");
239        assert_eq!(entry.response, "Here's how to write a function...");
240        assert_eq!(
241            entry.metadata.file_path,
242            Some("/Users/user/project".to_string())
243        );
244    }
245
246    #[test]
247    fn test_parse_conversation_array_format() {
248        let temp_dir = TempDir::new().unwrap();
249        let conv_path = temp_dir.path().join("array-format.json");
250
251        let json = r#"[
252            {
253                "role": "user",
254                "content": "test question"
255            },
256            {
257                "role": "assistant",
258                "content": "test answer"
259            }
260        ]"#;
261
262        fs::write(&conv_path, json).unwrap();
263
264        let parser = ClaudeParser::new(temp_dir.path());
265        let entries = parser.parse_all().unwrap();
266
267        assert_eq!(entries.len(), 1);
268        assert_eq!(entries[0].prompt, "test question");
269        assert_eq!(entries[0].response, "test answer");
270    }
271}