1use super::common::{
17 build_http_client, CloudConversation, CloudMessage, CloudProvider, FetchOptions,
18 HttpClientConfig,
19};
20use anyhow::{anyhow, Result};
21use chrono::{DateTime, Utc};
22use serde::{Deserialize, Deserializer};
23
24const CHATGPT_API_BASE: &str = "https://chatgpt.com/backend-api";
25
26fn deserialize_timestamp<'de, D>(deserializer: D) -> std::result::Result<f64, D::Error>
28where
29 D: Deserializer<'de>,
30{
31 use serde::de::Error;
32
33 #[derive(Deserialize)]
34 #[serde(untagged)]
35 enum TimestampFormat {
36 Float(f64),
37 String(String),
38 }
39
40 match TimestampFormat::deserialize(deserializer)? {
41 TimestampFormat::Float(f) => Ok(f),
42 TimestampFormat::String(s) => {
43 if let Ok(dt) = DateTime::parse_from_rfc3339(&s) {
45 Ok(dt.timestamp() as f64)
46 } else if let Ok(dt) = s.parse::<DateTime<Utc>>() {
47 Ok(dt.timestamp() as f64)
48 } else {
49 Err(D::Error::custom(format!("Invalid timestamp format: {}", s)))
50 }
51 }
52 }
53}
54
55fn deserialize_optional_timestamp<'de, D>(
57 deserializer: D,
58) -> std::result::Result<Option<f64>, D::Error>
59where
60 D: Deserializer<'de>,
61{
62 use serde::de::Error;
63
64 #[derive(Deserialize)]
65 #[serde(untagged)]
66 enum TimestampFormat {
67 Float(f64),
68 String(String),
69 Null,
70 }
71
72 match Option::<TimestampFormat>::deserialize(deserializer)? {
73 None => Ok(None),
74 Some(TimestampFormat::Null) => Ok(None),
75 Some(TimestampFormat::Float(f)) => Ok(Some(f)),
76 Some(TimestampFormat::String(s)) => {
77 if s.is_empty() {
78 return Ok(None);
79 }
80 if let Ok(dt) = DateTime::parse_from_rfc3339(&s) {
82 Ok(Some(dt.timestamp() as f64))
83 } else if let Ok(dt) = s.parse::<DateTime<Utc>>() {
84 Ok(Some(dt.timestamp() as f64))
85 } else {
86 Err(D::Error::custom(format!("Invalid timestamp format: {}", s)))
87 }
88 }
89 }
90}
91
92pub struct ChatGPTProvider {
94 api_key: Option<String>,
95 session_token: Option<String>,
96 access_token: Option<String>,
97 client: Option<reqwest::blocking::Client>,
98}
99
100impl ChatGPTProvider {
101 pub fn new(api_key: Option<String>) -> Self {
102 Self {
103 api_key,
104 session_token: None,
105 access_token: None,
106 client: None,
107 }
108 }
109
110 pub fn with_session_token(session_token: String) -> Self {
112 Self {
113 api_key: None,
114 session_token: Some(session_token),
115 access_token: None,
116 client: None,
117 }
118 }
119
120 fn ensure_client(&mut self) -> Result<&reqwest::blocking::Client> {
121 if self.client.is_none() {
122 let config = HttpClientConfig {
123 user_agent: "".to_string(),
124 ..Default::default()
125 };
126 self.client = Some(build_http_client(&config)?);
127 }
128 Ok(self.client.as_ref().unwrap())
129 }
130
131 fn get_access_token(&mut self) -> Result<String> {
133 if let Some(ref token) = self.access_token {
134 return Ok(token.clone());
135 }
136
137 let session_token = self
138 .session_token
139 .clone()
140 .ok_or_else(|| anyhow!("No session token available"))?;
141
142 let client = self.ensure_client()?;
143
144 let response = client
146 .get("https://chatgpt.com/api/auth/session")
147 .header(
148 "Cookie",
149 format!("__Secure-next-auth.session-token={}", session_token),
150 )
151 .header("Accept", "application/json")
152 .send()
153 .map_err(|e| anyhow!("Failed to get access token: {}", e))?;
154
155 if !response.status().is_success() {
156 let status = response.status();
157 let body = response.text().unwrap_or_default();
158 return Err(anyhow!(
159 "Session endpoint returned {}: {}. Authentication may have expired.",
160 status,
161 body
162 ));
163 }
164
165 let session_data: serde_json::Value = response
166 .json()
167 .map_err(|e| anyhow!("Failed to parse session response: {}", e))?;
168
169 let access_token = session_data
170 .get("accessToken")
171 .and_then(|v| v.as_str())
172 .ok_or_else(|| {
173 anyhow!("No access token in session response - authentication may have expired")
174 })?
175 .to_string();
176
177 self.access_token = Some(access_token.clone());
178 Ok(access_token)
179 }
180
181 fn get_auth_header(&mut self) -> Result<String> {
183 if let Some(ref token) = self.access_token {
184 return Ok(format!("Bearer {}", token));
185 }
186 if self.session_token.is_some() {
187 let token = self.get_access_token()?;
188 return Ok(format!("Bearer {}", token));
189 }
190 if let Some(ref key) = self.api_key {
191 return Ok(format!("Bearer {}", key));
192 }
193 Err(anyhow!("No authentication credentials available"))
194 }
195}
196
197#[derive(Debug, Deserialize)]
198struct ConversationListResponse {
199 items: Vec<ConversationItem>,
200 #[serde(default)]
201 limit: i32,
202 #[serde(default)]
203 offset: i32,
204 #[serde(default)]
205 total: i32,
206 #[serde(default)]
207 has_missing_conversations: bool,
208}
209
210#[derive(Debug, Deserialize)]
211struct ConversationItem {
212 id: String,
213 title: Option<String>,
214 #[serde(deserialize_with = "deserialize_timestamp")]
215 create_time: f64,
216 #[serde(default, deserialize_with = "deserialize_optional_timestamp")]
217 update_time: Option<f64>,
218 #[serde(default)]
219 is_archived: bool,
220}
221
222#[derive(Debug, Deserialize)]
223struct ConversationDetailResponse {
224 title: Option<String>,
225 #[serde(deserialize_with = "deserialize_timestamp")]
226 create_time: f64,
227 #[serde(default, deserialize_with = "deserialize_optional_timestamp")]
228 update_time: Option<f64>,
229 mapping: std::collections::HashMap<String, MessageNode>,
230 #[serde(default)]
231 current_node: Option<String>,
232 #[serde(default)]
233 conversation_id: Option<String>,
234 #[serde(default)]
235 model: Option<ModelInfo>,
236}
237
238#[derive(Debug, Deserialize)]
239struct MessageNode {
240 id: String,
241 #[serde(default)]
242 parent: Option<String>,
243 #[serde(default)]
244 children: Vec<String>,
245 message: Option<MessageContent>,
246}
247
248#[derive(Debug, Deserialize)]
249struct MessageContent {
250 id: String,
251 author: AuthorInfo,
252 #[serde(default, deserialize_with = "deserialize_optional_timestamp")]
253 create_time: Option<f64>,
254 content: ContentParts,
255 #[serde(default)]
256 metadata: Option<serde_json::Value>,
257}
258
259#[derive(Debug, Deserialize)]
260struct AuthorInfo {
261 role: String,
262 #[serde(default)]
263 name: Option<String>,
264 #[serde(default)]
265 metadata: Option<serde_json::Value>,
266}
267
268#[derive(Debug, Deserialize)]
269struct ContentParts {
270 content_type: String,
271 #[serde(default)]
272 parts: Option<Vec<serde_json::Value>>,
273 #[serde(default)]
274 text: Option<String>,
275}
276
277#[derive(Debug, Deserialize)]
278struct ModelInfo {
279 slug: Option<String>,
280 max_tokens: Option<i32>,
281 title: Option<String>,
282}
283
284impl CloudProvider for ChatGPTProvider {
285 fn name(&self) -> &'static str {
286 "ChatGPT"
287 }
288
289 fn api_base_url(&self) -> &str {
290 CHATGPT_API_BASE
291 }
292
293 fn is_authenticated(&self) -> bool {
294 self.api_key.is_some() || self.session_token.is_some() || self.access_token.is_some()
295 }
296
297 fn set_credentials(&mut self, api_key: Option<String>, session_token: Option<String>) {
298 self.api_key = api_key;
299 self.session_token = session_token;
300 self.access_token = None; }
302
303 fn list_conversations(&self, options: &FetchOptions) -> Result<Vec<CloudConversation>> {
304 let mut provider = ChatGPTProvider {
307 api_key: self.api_key.clone(),
308 session_token: self.session_token.clone(),
309 access_token: self.access_token.clone(),
310 client: None,
311 };
312
313 if !provider.is_authenticated() {
314 return Err(anyhow!(
315 "ChatGPT requires authentication. Provide a session token from browser cookies.\n\
316 Run 'chasm harvest scan --web' to check browser authentication status."
317 ));
318 }
319
320 let auth_header = provider.get_auth_header()?;
322 let client = provider.ensure_client()?;
323
324 let limit = options.limit.unwrap_or(50).min(100);
325 let url = format!(
326 "{}/conversations?offset=0&limit={}&order=updated",
327 CHATGPT_API_BASE, limit
328 );
329
330 let response = client
331 .get(&url)
332 .header("Authorization", &auth_header)
333 .header("Accept", "application/json")
334 .header("Content-Type", "application/json")
335 .send()
336 .map_err(|e| anyhow!("Failed to fetch conversations: {}", e))?;
337
338 if !response.status().is_success() {
339 let status = response.status();
340 let body = response.text().unwrap_or_default();
341 return Err(anyhow!(
342 "ChatGPT API returned {}: {}. Session may have expired - log in to chatgpt.com in your browser.",
343 status,
344 body
345 ));
346 }
347
348 let list_response: ConversationListResponse = response
349 .json()
350 .map_err(|e| anyhow!("Failed to parse conversation list: {}", e))?;
351
352 let mut conversations = Vec::new();
355 for item in list_response.items {
356 if item.is_archived && !options.include_archived {
358 continue;
359 }
360
361 let created = timestamp_to_datetime(item.create_time);
363 if let Some(after) = options.after {
364 if created < after {
365 continue;
366 }
367 }
368 if let Some(before) = options.before {
369 if created > before {
370 continue;
371 }
372 }
373
374 conversations.push(CloudConversation {
375 id: item.id,
376 title: item.title,
377 created_at: created,
378 updated_at: item.update_time.map(timestamp_to_datetime),
379 model: None,
380 messages: Vec::new(), metadata: None,
382 });
383 }
384
385 Ok(conversations)
386 }
387
388 fn fetch_conversation(&self, id: &str) -> Result<CloudConversation> {
389 let mut provider = ChatGPTProvider {
390 api_key: self.api_key.clone(),
391 session_token: self.session_token.clone(),
392 access_token: self.access_token.clone(),
393 client: None,
394 };
395
396 if !provider.is_authenticated() {
397 return Err(anyhow!("ChatGPT requires authentication"));
398 }
399
400 let auth_header = provider.get_auth_header()?;
401 let client = provider.ensure_client()?;
402
403 let url = format!("{}/conversation/{}", CHATGPT_API_BASE, id);
404
405 let response = client
406 .get(&url)
407 .header("Authorization", &auth_header)
408 .header("Accept", "application/json")
409 .send()
410 .map_err(|e| anyhow!("Failed to fetch conversation {}: {}", id, e))?;
411
412 if !response.status().is_success() {
413 let status = response.status();
414 return Err(anyhow!(
415 "Failed to fetch conversation {}: HTTP {}",
416 id,
417 status
418 ));
419 }
420
421 let detail: ConversationDetailResponse = response
422 .json()
423 .map_err(|e| anyhow!("Failed to parse conversation {}: {}", id, e))?;
424
425 let mut message_order: Vec<(String, CloudMessage)> = Vec::new();
428
429 for (node_id, node) in &detail.mapping {
430 if let Some(ref msg_content) = node.message {
431 let role = &msg_content.author.role;
432
433 if role == "system" || role == "tool" {
435 continue;
436 }
437
438 let content = msg_content
439 .content
440 .parts
441 .as_ref()
442 .map(|parts| {
443 parts
444 .iter()
445 .filter_map(|p| p.as_str().map(String::from))
446 .collect::<Vec<_>>()
447 .join("\n")
448 })
449 .or_else(|| msg_content.content.text.clone())
450 .unwrap_or_default();
451
452 if content.is_empty() {
453 continue;
454 }
455
456 let cloud_message = CloudMessage {
457 id: Some(msg_content.id.clone()),
458 role: role.clone(),
459 content,
460 timestamp: msg_content.create_time.map(timestamp_to_datetime),
461 model: detail.model.as_ref().and_then(|m| m.slug.clone()),
462 };
463
464 message_order.push((node_id.clone(), cloud_message));
465 }
466 }
467
468 message_order.sort_by(|a, b| {
470 let ts_a = a.1.timestamp.unwrap_or(DateTime::<Utc>::MIN_UTC);
471 let ts_b = b.1.timestamp.unwrap_or(DateTime::<Utc>::MIN_UTC);
472 ts_a.cmp(&ts_b)
473 });
474
475 let messages: Vec<CloudMessage> = message_order.into_iter().map(|(_, msg)| msg).collect();
476
477 Ok(CloudConversation {
478 id: id.to_string(),
479 title: detail.title,
480 created_at: timestamp_to_datetime(detail.create_time),
481 updated_at: detail.update_time.map(timestamp_to_datetime),
482 model: detail.model.and_then(|m| m.slug),
483 messages,
484 metadata: None,
485 })
486 }
487
488 fn api_key_env_var(&self) -> &'static str {
489 "OPENAI_API_KEY"
490 }
491}
492
493pub fn parse_chatgpt_export(json_data: &str) -> Result<Vec<CloudConversation>> {
495 let conversations: Vec<ChatGPTExportConversation> = serde_json::from_str(json_data)?;
496
497 Ok(conversations
498 .into_iter()
499 .map(|conv| CloudConversation {
500 id: conv.id,
501 title: conv.title,
502 created_at: timestamp_to_datetime(conv.create_time),
503 updated_at: conv.update_time.map(timestamp_to_datetime),
504 model: None,
505 messages: conv
506 .mapping
507 .into_iter()
508 .filter_map(|(_, node)| {
509 node.message.map(|msg| {
510 let content = msg
511 .content
512 .parts
513 .map(|parts| {
514 parts
515 .into_iter()
516 .filter_map(|p| p.as_str().map(String::from))
517 .collect::<Vec<_>>()
518 .join("\n")
519 })
520 .or(msg.content.text)
521 .unwrap_or_default();
522
523 CloudMessage {
524 id: Some(msg.id),
525 role: msg.author.role,
526 content,
527 timestamp: msg.create_time.map(timestamp_to_datetime),
528 model: None,
529 }
530 })
531 })
532 .filter(|m| !m.content.is_empty() && m.role != "system")
533 .collect(),
534 metadata: None,
535 })
536 .collect())
537}
538
539#[derive(Debug, Deserialize)]
540struct ChatGPTExportConversation {
541 id: String,
542 title: Option<String>,
543 create_time: f64,
544 update_time: Option<f64>,
545 mapping: std::collections::HashMap<String, ChatGPTExportNode>,
546}
547
548#[derive(Debug, Deserialize)]
549struct ChatGPTExportNode {
550 message: Option<ChatGPTExportMessage>,
551}
552
553#[derive(Debug, Deserialize)]
554struct ChatGPTExportMessage {
555 id: String,
556 author: ChatGPTExportAuthor,
557 create_time: Option<f64>,
558 content: ChatGPTExportContent,
559}
560
561#[derive(Debug, Deserialize)]
562struct ChatGPTExportAuthor {
563 role: String,
564}
565
566#[derive(Debug, Deserialize)]
567struct ChatGPTExportContent {
568 #[serde(default)]
569 parts: Option<Vec<serde_json::Value>>,
570 #[serde(default)]
571 text: Option<String>,
572}
573
574fn timestamp_to_datetime(ts: f64) -> DateTime<Utc> {
575 use chrono::TimeZone;
576 Utc.timestamp_opt(ts as i64, ((ts.fract()) * 1_000_000_000.0) as u32)
577 .single()
578 .unwrap_or_else(Utc::now)
579}
580
581#[cfg(test)]
582mod tests {
583 use super::*;
584
585 #[test]
586 fn test_chatgpt_provider_new() {
587 let provider = ChatGPTProvider::new(Some("test-key".to_string()));
588 assert_eq!(provider.name(), "ChatGPT");
589 assert!(provider.is_authenticated());
590 }
591
592 #[test]
593 fn test_chatgpt_provider_unauthenticated() {
594 let provider = ChatGPTProvider::new(None);
595 assert!(!provider.is_authenticated());
596 }
597
598 #[test]
599 fn test_timestamp_to_datetime() {
600 let ts = 1700000000.123;
601 let dt = timestamp_to_datetime(ts);
602 assert_eq!(dt.timestamp(), 1700000000);
603 }
604}