1use super::common::{
17 build_http_client, CloudConversation, CloudMessage, CloudProvider, FetchOptions,
18 HttpClientConfig,
19};
20use anyhow::{anyhow, Result};
21use chrono::{DateTime, Utc};
22use serde::{Deserialize, Deserializer};
23
24const CHATGPT_API_BASE: &str = "https://chatgpt.com/backend-api";
25
26fn deserialize_timestamp<'de, D>(deserializer: D) -> std::result::Result<f64, D::Error>
28where
29 D: Deserializer<'de>,
30{
31 use serde::de::Error;
32
33 #[derive(Deserialize)]
34 #[serde(untagged)]
35 enum TimestampFormat {
36 Float(f64),
37 String(String),
38 }
39
40 match TimestampFormat::deserialize(deserializer)? {
41 TimestampFormat::Float(f) => Ok(f),
42 TimestampFormat::String(s) => {
43 if let Ok(dt) = DateTime::parse_from_rfc3339(&s) {
45 Ok(dt.timestamp() as f64)
46 } else if let Ok(dt) = s.parse::<DateTime<Utc>>() {
47 Ok(dt.timestamp() as f64)
48 } else {
49 Err(D::Error::custom(format!("Invalid timestamp format: {}", s)))
50 }
51 }
52 }
53}
54
55fn deserialize_optional_timestamp<'de, D>(
57 deserializer: D,
58) -> std::result::Result<Option<f64>, D::Error>
59where
60 D: Deserializer<'de>,
61{
62 use serde::de::Error;
63
64 #[derive(Deserialize)]
65 #[serde(untagged)]
66 enum TimestampFormat {
67 Float(f64),
68 String(String),
69 Null,
70 }
71
72 match Option::<TimestampFormat>::deserialize(deserializer)? {
73 None => Ok(None),
74 Some(TimestampFormat::Null) => Ok(None),
75 Some(TimestampFormat::Float(f)) => Ok(Some(f)),
76 Some(TimestampFormat::String(s)) => {
77 if s.is_empty() {
78 return Ok(None);
79 }
80 if let Ok(dt) = DateTime::parse_from_rfc3339(&s) {
82 Ok(Some(dt.timestamp() as f64))
83 } else if let Ok(dt) = s.parse::<DateTime<Utc>>() {
84 Ok(Some(dt.timestamp() as f64))
85 } else {
86 Err(D::Error::custom(format!("Invalid timestamp format: {}", s)))
87 }
88 }
89 }
90}
91
92pub struct ChatGPTProvider {
94 api_key: Option<String>,
95 session_token: Option<String>,
96 access_token: Option<String>,
97 client: Option<reqwest::blocking::Client>,
98}
99
100impl ChatGPTProvider {
101 pub fn new(api_key: Option<String>) -> Self {
102 Self {
103 api_key,
104 session_token: None,
105 access_token: None,
106 client: None,
107 }
108 }
109
110 pub fn with_session_token(session_token: String) -> Self {
112 Self {
113 api_key: None,
114 session_token: Some(session_token),
115 access_token: None,
116 client: None,
117 }
118 }
119
120 fn ensure_client(&mut self) -> Result<&reqwest::blocking::Client> {
121 if self.client.is_none() {
122 let mut config = HttpClientConfig::default();
123 config.user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36".to_string();
124 self.client = Some(build_http_client(&config)?);
125 }
126 Ok(self.client.as_ref().unwrap())
127 }
128
129 fn get_access_token(&mut self) -> Result<String> {
131 if let Some(ref token) = self.access_token {
132 return Ok(token.clone());
133 }
134
135 let session_token = self
136 .session_token
137 .clone()
138 .ok_or_else(|| anyhow!("No session token available"))?;
139
140 let client = self.ensure_client()?;
141
142 let response = client
144 .get("https://chatgpt.com/api/auth/session")
145 .header(
146 "Cookie",
147 format!("__Secure-next-auth.session-token={}", session_token),
148 )
149 .header("Accept", "application/json")
150 .send()
151 .map_err(|e| anyhow!("Failed to get access token: {}", e))?;
152
153 if !response.status().is_success() {
154 let status = response.status();
155 let body = response.text().unwrap_or_default();
156 return Err(anyhow!(
157 "Session endpoint returned {}: {}. Authentication may have expired.",
158 status,
159 body
160 ));
161 }
162
163 let session_data: serde_json::Value = response
164 .json()
165 .map_err(|e| anyhow!("Failed to parse session response: {}", e))?;
166
167 let access_token = session_data
168 .get("accessToken")
169 .and_then(|v| v.as_str())
170 .ok_or_else(|| {
171 anyhow!("No access token in session response - authentication may have expired")
172 })?
173 .to_string();
174
175 self.access_token = Some(access_token.clone());
176 Ok(access_token)
177 }
178
179 fn get_auth_header(&mut self) -> Result<String> {
181 if let Some(ref token) = self.access_token {
182 return Ok(format!("Bearer {}", token));
183 }
184 if self.session_token.is_some() {
185 let token = self.get_access_token()?;
186 return Ok(format!("Bearer {}", token));
187 }
188 if let Some(ref key) = self.api_key {
189 return Ok(format!("Bearer {}", key));
190 }
191 Err(anyhow!("No authentication credentials available"))
192 }
193}
194
195#[derive(Debug, Deserialize)]
196struct ConversationListResponse {
197 items: Vec<ConversationItem>,
198 #[serde(default)]
199 limit: i32,
200 #[serde(default)]
201 offset: i32,
202 #[serde(default)]
203 total: i32,
204 #[serde(default)]
205 has_missing_conversations: bool,
206}
207
208#[derive(Debug, Deserialize)]
209struct ConversationItem {
210 id: String,
211 title: Option<String>,
212 #[serde(deserialize_with = "deserialize_timestamp")]
213 create_time: f64,
214 #[serde(default, deserialize_with = "deserialize_optional_timestamp")]
215 update_time: Option<f64>,
216 #[serde(default)]
217 is_archived: bool,
218}
219
220#[derive(Debug, Deserialize)]
221struct ConversationDetailResponse {
222 title: Option<String>,
223 #[serde(deserialize_with = "deserialize_timestamp")]
224 create_time: f64,
225 #[serde(default, deserialize_with = "deserialize_optional_timestamp")]
226 update_time: Option<f64>,
227 mapping: std::collections::HashMap<String, MessageNode>,
228 #[serde(default)]
229 current_node: Option<String>,
230 #[serde(default)]
231 conversation_id: Option<String>,
232 #[serde(default)]
233 model: Option<ModelInfo>,
234}
235
236#[derive(Debug, Deserialize)]
237struct MessageNode {
238 id: String,
239 #[serde(default)]
240 parent: Option<String>,
241 #[serde(default)]
242 children: Vec<String>,
243 message: Option<MessageContent>,
244}
245
246#[derive(Debug, Deserialize)]
247struct MessageContent {
248 id: String,
249 author: AuthorInfo,
250 #[serde(default, deserialize_with = "deserialize_optional_timestamp")]
251 create_time: Option<f64>,
252 content: ContentParts,
253 #[serde(default)]
254 metadata: Option<serde_json::Value>,
255}
256
257#[derive(Debug, Deserialize)]
258struct AuthorInfo {
259 role: String,
260 #[serde(default)]
261 name: Option<String>,
262 #[serde(default)]
263 metadata: Option<serde_json::Value>,
264}
265
266#[derive(Debug, Deserialize)]
267struct ContentParts {
268 content_type: String,
269 #[serde(default)]
270 parts: Option<Vec<serde_json::Value>>,
271 #[serde(default)]
272 text: Option<String>,
273}
274
275#[derive(Debug, Deserialize)]
276struct ModelInfo {
277 slug: Option<String>,
278 max_tokens: Option<i32>,
279 title: Option<String>,
280}
281
282impl CloudProvider for ChatGPTProvider {
283 fn name(&self) -> &'static str {
284 "ChatGPT"
285 }
286
287 fn api_base_url(&self) -> &str {
288 CHATGPT_API_BASE
289 }
290
291 fn is_authenticated(&self) -> bool {
292 self.api_key.is_some() || self.session_token.is_some() || self.access_token.is_some()
293 }
294
295 fn set_credentials(&mut self, api_key: Option<String>, session_token: Option<String>) {
296 self.api_key = api_key;
297 self.session_token = session_token;
298 self.access_token = None; }
300
301 fn list_conversations(&self, options: &FetchOptions) -> Result<Vec<CloudConversation>> {
302 let mut provider = ChatGPTProvider {
305 api_key: self.api_key.clone(),
306 session_token: self.session_token.clone(),
307 access_token: self.access_token.clone(),
308 client: None,
309 };
310
311 if !provider.is_authenticated() {
312 return Err(anyhow!(
313 "ChatGPT requires authentication. Provide a session token from browser cookies.\n\
314 Run 'chasm harvest scan --web' to check browser authentication status."
315 ));
316 }
317
318 let auth_header = provider.get_auth_header()?;
320 let client = provider.ensure_client()?;
321
322 let limit = options.limit.unwrap_or(50).min(100);
323 let url = format!(
324 "{}/conversations?offset=0&limit={}&order=updated",
325 CHATGPT_API_BASE, limit
326 );
327
328 let response = client
329 .get(&url)
330 .header("Authorization", &auth_header)
331 .header("Accept", "application/json")
332 .header("Content-Type", "application/json")
333 .send()
334 .map_err(|e| anyhow!("Failed to fetch conversations: {}", e))?;
335
336 if !response.status().is_success() {
337 let status = response.status();
338 let body = response.text().unwrap_or_default();
339 return Err(anyhow!(
340 "ChatGPT API returned {}: {}. Session may have expired - log in to chatgpt.com in your browser.",
341 status,
342 body
343 ));
344 }
345
346 let list_response: ConversationListResponse = response
347 .json()
348 .map_err(|e| anyhow!("Failed to parse conversation list: {}", e))?;
349
350 let mut conversations = Vec::new();
353 for item in list_response.items {
354 if item.is_archived && !options.include_archived {
356 continue;
357 }
358
359 let created = timestamp_to_datetime(item.create_time);
361 if let Some(after) = options.after {
362 if created < after {
363 continue;
364 }
365 }
366 if let Some(before) = options.before {
367 if created > before {
368 continue;
369 }
370 }
371
372 conversations.push(CloudConversation {
373 id: item.id,
374 title: item.title,
375 created_at: created,
376 updated_at: item.update_time.map(timestamp_to_datetime),
377 model: None,
378 messages: Vec::new(), metadata: None,
380 });
381 }
382
383 Ok(conversations)
384 }
385
386 fn fetch_conversation(&self, id: &str) -> Result<CloudConversation> {
387 let mut provider = ChatGPTProvider {
388 api_key: self.api_key.clone(),
389 session_token: self.session_token.clone(),
390 access_token: self.access_token.clone(),
391 client: None,
392 };
393
394 if !provider.is_authenticated() {
395 return Err(anyhow!("ChatGPT requires authentication"));
396 }
397
398 let auth_header = provider.get_auth_header()?;
399 let client = provider.ensure_client()?;
400
401 let url = format!("{}/conversation/{}", CHATGPT_API_BASE, id);
402
403 let response = client
404 .get(&url)
405 .header("Authorization", &auth_header)
406 .header("Accept", "application/json")
407 .send()
408 .map_err(|e| anyhow!("Failed to fetch conversation {}: {}", id, e))?;
409
410 if !response.status().is_success() {
411 let status = response.status();
412 return Err(anyhow!(
413 "Failed to fetch conversation {}: HTTP {}",
414 id,
415 status
416 ));
417 }
418
419 let detail: ConversationDetailResponse = response
420 .json()
421 .map_err(|e| anyhow!("Failed to parse conversation {}: {}", id, e))?;
422
423 let mut message_order: Vec<(String, CloudMessage)> = Vec::new();
426
427 for (node_id, node) in &detail.mapping {
428 if let Some(ref msg_content) = node.message {
429 let role = &msg_content.author.role;
430
431 if role == "system" || role == "tool" {
433 continue;
434 }
435
436 let content = msg_content
437 .content
438 .parts
439 .as_ref()
440 .map(|parts| {
441 parts
442 .iter()
443 .filter_map(|p| p.as_str().map(String::from))
444 .collect::<Vec<_>>()
445 .join("\n")
446 })
447 .or_else(|| msg_content.content.text.clone())
448 .unwrap_or_default();
449
450 if content.is_empty() {
451 continue;
452 }
453
454 let cloud_message = CloudMessage {
455 id: Some(msg_content.id.clone()),
456 role: role.clone(),
457 content,
458 timestamp: msg_content.create_time.map(timestamp_to_datetime),
459 model: detail.model.as_ref().and_then(|m| m.slug.clone()),
460 };
461
462 message_order.push((node_id.clone(), cloud_message));
463 }
464 }
465
466 message_order.sort_by(|a, b| {
468 let ts_a = a.1.timestamp.unwrap_or(DateTime::<Utc>::MIN_UTC);
469 let ts_b = b.1.timestamp.unwrap_or(DateTime::<Utc>::MIN_UTC);
470 ts_a.cmp(&ts_b)
471 });
472
473 let messages: Vec<CloudMessage> = message_order.into_iter().map(|(_, msg)| msg).collect();
474
475 Ok(CloudConversation {
476 id: id.to_string(),
477 title: detail.title,
478 created_at: timestamp_to_datetime(detail.create_time),
479 updated_at: detail.update_time.map(timestamp_to_datetime),
480 model: detail.model.and_then(|m| m.slug),
481 messages,
482 metadata: None,
483 })
484 }
485
486 fn api_key_env_var(&self) -> &'static str {
487 "OPENAI_API_KEY"
488 }
489}
490
491pub fn parse_chatgpt_export(json_data: &str) -> Result<Vec<CloudConversation>> {
493 let conversations: Vec<ChatGPTExportConversation> = serde_json::from_str(json_data)?;
494
495 Ok(conversations
496 .into_iter()
497 .map(|conv| CloudConversation {
498 id: conv.id,
499 title: conv.title,
500 created_at: timestamp_to_datetime(conv.create_time),
501 updated_at: conv.update_time.map(timestamp_to_datetime),
502 model: None,
503 messages: conv
504 .mapping
505 .into_iter()
506 .filter_map(|(_, node)| {
507 node.message.map(|msg| {
508 let content = msg
509 .content
510 .parts
511 .map(|parts| {
512 parts
513 .into_iter()
514 .filter_map(|p| p.as_str().map(String::from))
515 .collect::<Vec<_>>()
516 .join("\n")
517 })
518 .or(msg.content.text)
519 .unwrap_or_default();
520
521 CloudMessage {
522 id: Some(msg.id),
523 role: msg.author.role,
524 content,
525 timestamp: msg.create_time.map(timestamp_to_datetime),
526 model: None,
527 }
528 })
529 })
530 .filter(|m| !m.content.is_empty() && m.role != "system")
531 .collect(),
532 metadata: None,
533 })
534 .collect())
535}
536
537#[derive(Debug, Deserialize)]
538struct ChatGPTExportConversation {
539 id: String,
540 title: Option<String>,
541 create_time: f64,
542 update_time: Option<f64>,
543 mapping: std::collections::HashMap<String, ChatGPTExportNode>,
544}
545
546#[derive(Debug, Deserialize)]
547struct ChatGPTExportNode {
548 message: Option<ChatGPTExportMessage>,
549}
550
551#[derive(Debug, Deserialize)]
552struct ChatGPTExportMessage {
553 id: String,
554 author: ChatGPTExportAuthor,
555 create_time: Option<f64>,
556 content: ChatGPTExportContent,
557}
558
559#[derive(Debug, Deserialize)]
560struct ChatGPTExportAuthor {
561 role: String,
562}
563
564#[derive(Debug, Deserialize)]
565struct ChatGPTExportContent {
566 #[serde(default)]
567 parts: Option<Vec<serde_json::Value>>,
568 #[serde(default)]
569 text: Option<String>,
570}
571
572fn timestamp_to_datetime(ts: f64) -> DateTime<Utc> {
573 use chrono::TimeZone;
574 Utc.timestamp_opt(ts as i64, ((ts.fract()) * 1_000_000_000.0) as u32)
575 .single()
576 .unwrap_or_else(Utc::now)
577}
578
579#[cfg(test)]
580mod tests {
581 use super::*;
582
583 #[test]
584 fn test_chatgpt_provider_new() {
585 let provider = ChatGPTProvider::new(Some("test-key".to_string()));
586 assert_eq!(provider.name(), "ChatGPT");
587 assert!(provider.is_authenticated());
588 }
589
590 #[test]
591 fn test_chatgpt_provider_unauthenticated() {
592 let provider = ChatGPTProvider::new(None);
593 assert!(!provider.is_authenticated());
594 }
595
596 #[test]
597 fn test_timestamp_to_datetime() {
598 let ts = 1700000000.123;
599 let dt = timestamp_to_datetime(ts);
600 assert_eq!(dt.timestamp(), 1700000000);
601 }
602}