chasm_cli/providers/cloud/
chatgpt.rs

1// Copyright (c) 2024-2026 Nervosys LLC
2// SPDX-License-Identifier: Apache-2.0
3//! ChatGPT (OpenAI) cloud provider
4//!
5//! Fetches conversation history from ChatGPT web interface.
6//!
7//! ## Authentication
8//!
9//! Requires either:
10//! - API key via `OPENAI_API_KEY` environment variable (for API access)
11//! - Session token for web interface access (retrieved from browser cookies)
12//!
13//! Note: The official API doesn't provide conversation history access.
14//! Web scraping requires a session token from browser cookies.
15
16use super::common::{
17    build_http_client, CloudConversation, CloudMessage, CloudProvider, FetchOptions,
18    HttpClientConfig,
19};
20use anyhow::{anyhow, Result};
21use chrono::{DateTime, Utc};
22use serde::{Deserialize, Deserializer};
23
24const CHATGPT_API_BASE: &str = "https://chatgpt.com/backend-api";
25
26/// Custom deserializer that handles both Unix timestamp (f64) and ISO8601 string
27fn deserialize_timestamp<'de, D>(deserializer: D) -> std::result::Result<f64, D::Error>
28where
29    D: Deserializer<'de>,
30{
31    use serde::de::Error;
32
33    #[derive(Deserialize)]
34    #[serde(untagged)]
35    enum TimestampFormat {
36        Float(f64),
37        String(String),
38    }
39
40    match TimestampFormat::deserialize(deserializer)? {
41        TimestampFormat::Float(f) => Ok(f),
42        TimestampFormat::String(s) => {
43            // Try to parse as ISO8601
44            if let Ok(dt) = DateTime::parse_from_rfc3339(&s) {
45                Ok(dt.timestamp() as f64)
46            } else if let Ok(dt) = s.parse::<DateTime<Utc>>() {
47                Ok(dt.timestamp() as f64)
48            } else {
49                Err(D::Error::custom(format!("Invalid timestamp format: {}", s)))
50            }
51        }
52    }
53}
54
55/// Custom deserializer that handles optional timestamps in both formats
56fn deserialize_optional_timestamp<'de, D>(
57    deserializer: D,
58) -> std::result::Result<Option<f64>, D::Error>
59where
60    D: Deserializer<'de>,
61{
62    use serde::de::Error;
63
64    #[derive(Deserialize)]
65    #[serde(untagged)]
66    enum TimestampFormat {
67        Float(f64),
68        String(String),
69        Null,
70    }
71
72    match Option::<TimestampFormat>::deserialize(deserializer)? {
73        None => Ok(None),
74        Some(TimestampFormat::Null) => Ok(None),
75        Some(TimestampFormat::Float(f)) => Ok(Some(f)),
76        Some(TimestampFormat::String(s)) => {
77            if s.is_empty() {
78                return Ok(None);
79            }
80            // Try to parse as ISO8601
81            if let Ok(dt) = DateTime::parse_from_rfc3339(&s) {
82                Ok(Some(dt.timestamp() as f64))
83            } else if let Ok(dt) = s.parse::<DateTime<Utc>>() {
84                Ok(Some(dt.timestamp() as f64))
85            } else {
86                Err(D::Error::custom(format!("Invalid timestamp format: {}", s)))
87            }
88        }
89    }
90}
91
92/// ChatGPT provider for fetching conversation history
93pub struct ChatGPTProvider {
94    api_key: Option<String>,
95    session_token: Option<String>,
96    access_token: Option<String>,
97    client: Option<reqwest::blocking::Client>,
98}
99
100impl ChatGPTProvider {
101    pub fn new(api_key: Option<String>) -> Self {
102        Self {
103            api_key,
104            session_token: None,
105            access_token: None,
106            client: None,
107        }
108    }
109
110    /// Create provider with session token from browser cookies
111    pub fn with_session_token(session_token: String) -> Self {
112        Self {
113            api_key: None,
114            session_token: Some(session_token),
115            access_token: None,
116            client: None,
117        }
118    }
119
120    fn ensure_client(&mut self) -> Result<&reqwest::blocking::Client> {
121        if self.client.is_none() {
122            let mut config = HttpClientConfig::default();
123            config.user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36".to_string();
124            self.client = Some(build_http_client(&config)?);
125        }
126        Ok(self.client.as_ref().unwrap())
127    }
128
129    /// Exchange session token for access token
130    fn get_access_token(&mut self) -> Result<String> {
131        if let Some(ref token) = self.access_token {
132            return Ok(token.clone());
133        }
134
135        let session_token = self
136            .session_token
137            .clone()
138            .ok_or_else(|| anyhow!("No session token available"))?;
139
140        let client = self.ensure_client()?;
141
142        // Call the session endpoint to get access token
143        let response = client
144            .get("https://chatgpt.com/api/auth/session")
145            .header(
146                "Cookie",
147                format!("__Secure-next-auth.session-token={}", session_token),
148            )
149            .header("Accept", "application/json")
150            .send()
151            .map_err(|e| anyhow!("Failed to get access token: {}", e))?;
152
153        if !response.status().is_success() {
154            let status = response.status();
155            let body = response.text().unwrap_or_default();
156            return Err(anyhow!(
157                "Session endpoint returned {}: {}. Authentication may have expired.",
158                status,
159                body
160            ));
161        }
162
163        let session_data: serde_json::Value = response
164            .json()
165            .map_err(|e| anyhow!("Failed to parse session response: {}", e))?;
166
167        let access_token = session_data
168            .get("accessToken")
169            .and_then(|v| v.as_str())
170            .ok_or_else(|| {
171                anyhow!("No access token in session response - authentication may have expired")
172            })?
173            .to_string();
174
175        self.access_token = Some(access_token.clone());
176        Ok(access_token)
177    }
178
179    /// Build authorization header
180    fn get_auth_header(&mut self) -> Result<String> {
181        if let Some(ref token) = self.access_token {
182            return Ok(format!("Bearer {}", token));
183        }
184        if self.session_token.is_some() {
185            let token = self.get_access_token()?;
186            return Ok(format!("Bearer {}", token));
187        }
188        if let Some(ref key) = self.api_key {
189            return Ok(format!("Bearer {}", key));
190        }
191        Err(anyhow!("No authentication credentials available"))
192    }
193}
194
195#[derive(Debug, Deserialize)]
196struct ConversationListResponse {
197    items: Vec<ConversationItem>,
198    #[serde(default)]
199    limit: i32,
200    #[serde(default)]
201    offset: i32,
202    #[serde(default)]
203    total: i32,
204    #[serde(default)]
205    has_missing_conversations: bool,
206}
207
208#[derive(Debug, Deserialize)]
209struct ConversationItem {
210    id: String,
211    title: Option<String>,
212    #[serde(deserialize_with = "deserialize_timestamp")]
213    create_time: f64,
214    #[serde(default, deserialize_with = "deserialize_optional_timestamp")]
215    update_time: Option<f64>,
216    #[serde(default)]
217    is_archived: bool,
218}
219
220#[derive(Debug, Deserialize)]
221struct ConversationDetailResponse {
222    title: Option<String>,
223    #[serde(deserialize_with = "deserialize_timestamp")]
224    create_time: f64,
225    #[serde(default, deserialize_with = "deserialize_optional_timestamp")]
226    update_time: Option<f64>,
227    mapping: std::collections::HashMap<String, MessageNode>,
228    #[serde(default)]
229    current_node: Option<String>,
230    #[serde(default)]
231    conversation_id: Option<String>,
232    #[serde(default)]
233    model: Option<ModelInfo>,
234}
235
236#[derive(Debug, Deserialize)]
237struct MessageNode {
238    id: String,
239    #[serde(default)]
240    parent: Option<String>,
241    #[serde(default)]
242    children: Vec<String>,
243    message: Option<MessageContent>,
244}
245
246#[derive(Debug, Deserialize)]
247struct MessageContent {
248    id: String,
249    author: AuthorInfo,
250    #[serde(default, deserialize_with = "deserialize_optional_timestamp")]
251    create_time: Option<f64>,
252    content: ContentParts,
253    #[serde(default)]
254    metadata: Option<serde_json::Value>,
255}
256
257#[derive(Debug, Deserialize)]
258struct AuthorInfo {
259    role: String,
260    #[serde(default)]
261    name: Option<String>,
262    #[serde(default)]
263    metadata: Option<serde_json::Value>,
264}
265
266#[derive(Debug, Deserialize)]
267struct ContentParts {
268    content_type: String,
269    #[serde(default)]
270    parts: Option<Vec<serde_json::Value>>,
271    #[serde(default)]
272    text: Option<String>,
273}
274
275#[derive(Debug, Deserialize)]
276struct ModelInfo {
277    slug: Option<String>,
278    max_tokens: Option<i32>,
279    title: Option<String>,
280}
281
282impl CloudProvider for ChatGPTProvider {
283    fn name(&self) -> &'static str {
284        "ChatGPT"
285    }
286
287    fn api_base_url(&self) -> &str {
288        CHATGPT_API_BASE
289    }
290
291    fn is_authenticated(&self) -> bool {
292        self.api_key.is_some() || self.session_token.is_some() || self.access_token.is_some()
293    }
294
295    fn set_credentials(&mut self, api_key: Option<String>, session_token: Option<String>) {
296        self.api_key = api_key;
297        self.session_token = session_token;
298        self.access_token = None; // Clear cached access token when credentials change
299    }
300
301    fn list_conversations(&self, options: &FetchOptions) -> Result<Vec<CloudConversation>> {
302        // We need mutable self to get access token, so use interior mutability pattern
303        // For now, create a new instance - this is a workaround for the trait signature
304        let mut provider = ChatGPTProvider {
305            api_key: self.api_key.clone(),
306            session_token: self.session_token.clone(),
307            access_token: self.access_token.clone(),
308            client: None,
309        };
310
311        if !provider.is_authenticated() {
312            return Err(anyhow!(
313                "ChatGPT requires authentication. Provide a session token from browser cookies.\n\
314                Run 'chasm harvest scan --web' to check browser authentication status."
315            ));
316        }
317
318        // Try to get access token and list conversations
319        let auth_header = provider.get_auth_header()?;
320        let client = provider.ensure_client()?;
321
322        let limit = options.limit.unwrap_or(50).min(100);
323        let url = format!(
324            "{}/conversations?offset=0&limit={}&order=updated",
325            CHATGPT_API_BASE, limit
326        );
327
328        let response = client
329            .get(&url)
330            .header("Authorization", &auth_header)
331            .header("Accept", "application/json")
332            .header("Content-Type", "application/json")
333            .send()
334            .map_err(|e| anyhow!("Failed to fetch conversations: {}", e))?;
335
336        if !response.status().is_success() {
337            let status = response.status();
338            let body = response.text().unwrap_or_default();
339            return Err(anyhow!(
340                "ChatGPT API returned {}: {}. Session may have expired - log in to chatgpt.com in your browser.",
341                status,
342                body
343            ));
344        }
345
346        let list_response: ConversationListResponse = response
347            .json()
348            .map_err(|e| anyhow!("Failed to parse conversation list: {}", e))?;
349
350        // Debug: Found {} conversations (total: {})
351
352        let mut conversations = Vec::new();
353        for item in list_response.items {
354            // Skip archived if not requested
355            if item.is_archived && !options.include_archived {
356                continue;
357            }
358
359            // Apply date filters
360            let created = timestamp_to_datetime(item.create_time);
361            if let Some(after) = options.after {
362                if created < after {
363                    continue;
364                }
365            }
366            if let Some(before) = options.before {
367                if created > before {
368                    continue;
369                }
370            }
371
372            conversations.push(CloudConversation {
373                id: item.id,
374                title: item.title,
375                created_at: created,
376                updated_at: item.update_time.map(timestamp_to_datetime),
377                model: None,
378                messages: Vec::new(), // Will be populated by fetch_conversation
379                metadata: None,
380            });
381        }
382
383        Ok(conversations)
384    }
385
386    fn fetch_conversation(&self, id: &str) -> Result<CloudConversation> {
387        let mut provider = ChatGPTProvider {
388            api_key: self.api_key.clone(),
389            session_token: self.session_token.clone(),
390            access_token: self.access_token.clone(),
391            client: None,
392        };
393
394        if !provider.is_authenticated() {
395            return Err(anyhow!("ChatGPT requires authentication"));
396        }
397
398        let auth_header = provider.get_auth_header()?;
399        let client = provider.ensure_client()?;
400
401        let url = format!("{}/conversation/{}", CHATGPT_API_BASE, id);
402
403        let response = client
404            .get(&url)
405            .header("Authorization", &auth_header)
406            .header("Accept", "application/json")
407            .send()
408            .map_err(|e| anyhow!("Failed to fetch conversation {}: {}", id, e))?;
409
410        if !response.status().is_success() {
411            let status = response.status();
412            return Err(anyhow!(
413                "Failed to fetch conversation {}: HTTP {}",
414                id,
415                status
416            ));
417        }
418
419        let detail: ConversationDetailResponse = response
420            .json()
421            .map_err(|e| anyhow!("Failed to parse conversation {}: {}", id, e))?;
422
423        // Extract messages from the mapping tree
424        // Build a map of node IDs to their messages
425        let mut message_order: Vec<(String, CloudMessage)> = Vec::new();
426
427        for (node_id, node) in &detail.mapping {
428            if let Some(ref msg_content) = node.message {
429                let role = &msg_content.author.role;
430
431                // Skip system messages and tool messages
432                if role == "system" || role == "tool" {
433                    continue;
434                }
435
436                let content = msg_content
437                    .content
438                    .parts
439                    .as_ref()
440                    .map(|parts| {
441                        parts
442                            .iter()
443                            .filter_map(|p| p.as_str().map(String::from))
444                            .collect::<Vec<_>>()
445                            .join("\n")
446                    })
447                    .or_else(|| msg_content.content.text.clone())
448                    .unwrap_or_default();
449
450                if content.is_empty() {
451                    continue;
452                }
453
454                let cloud_message = CloudMessage {
455                    id: Some(msg_content.id.clone()),
456                    role: role.clone(),
457                    content,
458                    timestamp: msg_content.create_time.map(timestamp_to_datetime),
459                    model: detail.model.as_ref().and_then(|m| m.slug.clone()),
460                };
461
462                message_order.push((node_id.clone(), cloud_message));
463            }
464        }
465
466        // Sort messages by timestamp if available
467        message_order.sort_by(|a, b| {
468            let ts_a = a.1.timestamp.unwrap_or(DateTime::<Utc>::MIN_UTC);
469            let ts_b = b.1.timestamp.unwrap_or(DateTime::<Utc>::MIN_UTC);
470            ts_a.cmp(&ts_b)
471        });
472
473        let messages: Vec<CloudMessage> = message_order.into_iter().map(|(_, msg)| msg).collect();
474
475        Ok(CloudConversation {
476            id: id.to_string(),
477            title: detail.title,
478            created_at: timestamp_to_datetime(detail.create_time),
479            updated_at: detail.update_time.map(timestamp_to_datetime),
480            model: detail.model.and_then(|m| m.slug),
481            messages,
482            metadata: None,
483        })
484    }
485
486    fn api_key_env_var(&self) -> &'static str {
487        "OPENAI_API_KEY"
488    }
489}
490
491/// Parse a ChatGPT export file (JSON format from "Export data" feature)
492pub fn parse_chatgpt_export(json_data: &str) -> Result<Vec<CloudConversation>> {
493    let conversations: Vec<ChatGPTExportConversation> = serde_json::from_str(json_data)?;
494
495    Ok(conversations
496        .into_iter()
497        .map(|conv| CloudConversation {
498            id: conv.id,
499            title: conv.title,
500            created_at: timestamp_to_datetime(conv.create_time),
501            updated_at: conv.update_time.map(timestamp_to_datetime),
502            model: None,
503            messages: conv
504                .mapping
505                .into_iter()
506                .filter_map(|(_, node)| {
507                    node.message.map(|msg| {
508                        let content = msg
509                            .content
510                            .parts
511                            .map(|parts| {
512                                parts
513                                    .into_iter()
514                                    .filter_map(|p| p.as_str().map(String::from))
515                                    .collect::<Vec<_>>()
516                                    .join("\n")
517                            })
518                            .or(msg.content.text)
519                            .unwrap_or_default();
520
521                        CloudMessage {
522                            id: Some(msg.id),
523                            role: msg.author.role,
524                            content,
525                            timestamp: msg.create_time.map(timestamp_to_datetime),
526                            model: None,
527                        }
528                    })
529                })
530                .filter(|m| !m.content.is_empty() && m.role != "system")
531                .collect(),
532            metadata: None,
533        })
534        .collect())
535}
536
537#[derive(Debug, Deserialize)]
538struct ChatGPTExportConversation {
539    id: String,
540    title: Option<String>,
541    create_time: f64,
542    update_time: Option<f64>,
543    mapping: std::collections::HashMap<String, ChatGPTExportNode>,
544}
545
546#[derive(Debug, Deserialize)]
547struct ChatGPTExportNode {
548    message: Option<ChatGPTExportMessage>,
549}
550
551#[derive(Debug, Deserialize)]
552struct ChatGPTExportMessage {
553    id: String,
554    author: ChatGPTExportAuthor,
555    create_time: Option<f64>,
556    content: ChatGPTExportContent,
557}
558
559#[derive(Debug, Deserialize)]
560struct ChatGPTExportAuthor {
561    role: String,
562}
563
564#[derive(Debug, Deserialize)]
565struct ChatGPTExportContent {
566    #[serde(default)]
567    parts: Option<Vec<serde_json::Value>>,
568    #[serde(default)]
569    text: Option<String>,
570}
571
572fn timestamp_to_datetime(ts: f64) -> DateTime<Utc> {
573    use chrono::TimeZone;
574    Utc.timestamp_opt(ts as i64, ((ts.fract()) * 1_000_000_000.0) as u32)
575        .single()
576        .unwrap_or_else(Utc::now)
577}
578
579#[cfg(test)]
580mod tests {
581    use super::*;
582
583    #[test]
584    fn test_chatgpt_provider_new() {
585        let provider = ChatGPTProvider::new(Some("test-key".to_string()));
586        assert_eq!(provider.name(), "ChatGPT");
587        assert!(provider.is_authenticated());
588    }
589
590    #[test]
591    fn test_chatgpt_provider_unauthenticated() {
592        let provider = ChatGPTProvider::new(None);
593        assert!(!provider.is_authenticated());
594    }
595
596    #[test]
597    fn test_timestamp_to_datetime() {
598        let ts = 1700000000.123;
599        let dt = timestamp_to_datetime(ts);
600        assert_eq!(dt.timestamp(), 1700000000);
601    }
602}