chasm_cli/providers/cloud/
chatgpt.rs

1// Copyright (c) 2024-2026 Nervosys LLC
2// SPDX-License-Identifier: Apache-2.0
3//! ChatGPT (OpenAI) cloud provider
4//!
5//! Fetches conversation history from ChatGPT web interface.
6//!
7//! ## Authentication
8//!
9//! Requires either:
10//! - API key via `OPENAI_API_KEY` environment variable (for API access)
11//! - Session token for web interface access (retrieved from browser cookies)
12//!
13//! Note: The official API doesn't provide conversation history access.
14//! Web scraping requires a session token from browser cookies.
15
16use super::common::{
17    build_http_client, CloudConversation, CloudMessage, CloudProvider, FetchOptions,
18    HttpClientConfig,
19};
20use anyhow::{anyhow, Result};
21use chrono::{DateTime, Utc};
22use serde::{Deserialize, Deserializer};
23
24const CHATGPT_API_BASE: &str = "https://chatgpt.com/backend-api";
25
26/// Custom deserializer that handles both Unix timestamp (f64) and ISO8601 string
27fn deserialize_timestamp<'de, D>(deserializer: D) -> std::result::Result<f64, D::Error>
28where
29    D: Deserializer<'de>,
30{
31    use serde::de::Error;
32
33    #[derive(Deserialize)]
34    #[serde(untagged)]
35    enum TimestampFormat {
36        Float(f64),
37        String(String),
38    }
39
40    match TimestampFormat::deserialize(deserializer)? {
41        TimestampFormat::Float(f) => Ok(f),
42        TimestampFormat::String(s) => {
43            // Try to parse as ISO8601
44            if let Ok(dt) = DateTime::parse_from_rfc3339(&s) {
45                Ok(dt.timestamp() as f64)
46            } else if let Ok(dt) = s.parse::<DateTime<Utc>>() {
47                Ok(dt.timestamp() as f64)
48            } else {
49                Err(D::Error::custom(format!("Invalid timestamp format: {}", s)))
50            }
51        }
52    }
53}
54
55/// Custom deserializer that handles optional timestamps in both formats
56fn deserialize_optional_timestamp<'de, D>(
57    deserializer: D,
58) -> std::result::Result<Option<f64>, D::Error>
59where
60    D: Deserializer<'de>,
61{
62    use serde::de::Error;
63
64    #[derive(Deserialize)]
65    #[serde(untagged)]
66    enum TimestampFormat {
67        Float(f64),
68        String(String),
69        Null,
70    }
71
72    match Option::<TimestampFormat>::deserialize(deserializer)? {
73        None => Ok(None),
74        Some(TimestampFormat::Null) => Ok(None),
75        Some(TimestampFormat::Float(f)) => Ok(Some(f)),
76        Some(TimestampFormat::String(s)) => {
77            if s.is_empty() {
78                return Ok(None);
79            }
80            // Try to parse as ISO8601
81            if let Ok(dt) = DateTime::parse_from_rfc3339(&s) {
82                Ok(Some(dt.timestamp() as f64))
83            } else if let Ok(dt) = s.parse::<DateTime<Utc>>() {
84                Ok(Some(dt.timestamp() as f64))
85            } else {
86                Err(D::Error::custom(format!("Invalid timestamp format: {}", s)))
87            }
88        }
89    }
90}
91
92/// ChatGPT provider for fetching conversation history
93pub struct ChatGPTProvider {
94    api_key: Option<String>,
95    session_token: Option<String>,
96    access_token: Option<String>,
97    client: Option<reqwest::blocking::Client>,
98}
99
100impl ChatGPTProvider {
101    pub fn new(api_key: Option<String>) -> Self {
102        Self {
103            api_key,
104            session_token: None,
105            access_token: None,
106            client: None,
107        }
108    }
109
110    /// Create provider with session token from browser cookies
111    pub fn with_session_token(session_token: String) -> Self {
112        Self {
113            api_key: None,
114            session_token: Some(session_token),
115            access_token: None,
116            client: None,
117        }
118    }
119
120    fn ensure_client(&mut self) -> Result<&reqwest::blocking::Client> {
121        if self.client.is_none() {
122            let config = HttpClientConfig {
123                user_agent: "".to_string(),
124                ..Default::default()
125            };
126            self.client = Some(build_http_client(&config)?);
127        }
128        Ok(self.client.as_ref().unwrap())
129    }
130
131    /// Exchange session token for access token
132    fn get_access_token(&mut self) -> Result<String> {
133        if let Some(ref token) = self.access_token {
134            return Ok(token.clone());
135        }
136
137        let session_token = self
138            .session_token
139            .clone()
140            .ok_or_else(|| anyhow!("No session token available"))?;
141
142        let client = self.ensure_client()?;
143
144        // Call the session endpoint to get access token
145        let response = client
146            .get("https://chatgpt.com/api/auth/session")
147            .header(
148                "Cookie",
149                format!("__Secure-next-auth.session-token={}", session_token),
150            )
151            .header("Accept", "application/json")
152            .send()
153            .map_err(|e| anyhow!("Failed to get access token: {}", e))?;
154
155        if !response.status().is_success() {
156            let status = response.status();
157            let body = response.text().unwrap_or_default();
158            return Err(anyhow!(
159                "Session endpoint returned {}: {}. Authentication may have expired.",
160                status,
161                body
162            ));
163        }
164
165        let session_data: serde_json::Value = response
166            .json()
167            .map_err(|e| anyhow!("Failed to parse session response: {}", e))?;
168
169        let access_token = session_data
170            .get("accessToken")
171            .and_then(|v| v.as_str())
172            .ok_or_else(|| {
173                anyhow!("No access token in session response - authentication may have expired")
174            })?
175            .to_string();
176
177        self.access_token = Some(access_token.clone());
178        Ok(access_token)
179    }
180
181    /// Build authorization header
182    fn get_auth_header(&mut self) -> Result<String> {
183        if let Some(ref token) = self.access_token {
184            return Ok(format!("Bearer {}", token));
185        }
186        if self.session_token.is_some() {
187            let token = self.get_access_token()?;
188            return Ok(format!("Bearer {}", token));
189        }
190        if let Some(ref key) = self.api_key {
191            return Ok(format!("Bearer {}", key));
192        }
193        Err(anyhow!("No authentication credentials available"))
194    }
195}
196
197#[derive(Debug, Deserialize)]
198struct ConversationListResponse {
199    items: Vec<ConversationItem>,
200    #[serde(default)]
201    limit: i32,
202    #[serde(default)]
203    offset: i32,
204    #[serde(default)]
205    total: i32,
206    #[serde(default)]
207    has_missing_conversations: bool,
208}
209
210#[derive(Debug, Deserialize)]
211struct ConversationItem {
212    id: String,
213    title: Option<String>,
214    #[serde(deserialize_with = "deserialize_timestamp")]
215    create_time: f64,
216    #[serde(default, deserialize_with = "deserialize_optional_timestamp")]
217    update_time: Option<f64>,
218    #[serde(default)]
219    is_archived: bool,
220}
221
222#[derive(Debug, Deserialize)]
223struct ConversationDetailResponse {
224    title: Option<String>,
225    #[serde(deserialize_with = "deserialize_timestamp")]
226    create_time: f64,
227    #[serde(default, deserialize_with = "deserialize_optional_timestamp")]
228    update_time: Option<f64>,
229    mapping: std::collections::HashMap<String, MessageNode>,
230    #[serde(default)]
231    current_node: Option<String>,
232    #[serde(default)]
233    conversation_id: Option<String>,
234    #[serde(default)]
235    model: Option<ModelInfo>,
236}
237
238#[derive(Debug, Deserialize)]
239struct MessageNode {
240    id: String,
241    #[serde(default)]
242    parent: Option<String>,
243    #[serde(default)]
244    children: Vec<String>,
245    message: Option<MessageContent>,
246}
247
248#[derive(Debug, Deserialize)]
249struct MessageContent {
250    id: String,
251    author: AuthorInfo,
252    #[serde(default, deserialize_with = "deserialize_optional_timestamp")]
253    create_time: Option<f64>,
254    content: ContentParts,
255    #[serde(default)]
256    metadata: Option<serde_json::Value>,
257}
258
259#[derive(Debug, Deserialize)]
260struct AuthorInfo {
261    role: String,
262    #[serde(default)]
263    name: Option<String>,
264    #[serde(default)]
265    metadata: Option<serde_json::Value>,
266}
267
268#[derive(Debug, Deserialize)]
269struct ContentParts {
270    content_type: String,
271    #[serde(default)]
272    parts: Option<Vec<serde_json::Value>>,
273    #[serde(default)]
274    text: Option<String>,
275}
276
277#[derive(Debug, Deserialize)]
278struct ModelInfo {
279    slug: Option<String>,
280    max_tokens: Option<i32>,
281    title: Option<String>,
282}
283
284impl CloudProvider for ChatGPTProvider {
285    fn name(&self) -> &'static str {
286        "ChatGPT"
287    }
288
289    fn api_base_url(&self) -> &str {
290        CHATGPT_API_BASE
291    }
292
293    fn is_authenticated(&self) -> bool {
294        self.api_key.is_some() || self.session_token.is_some() || self.access_token.is_some()
295    }
296
297    fn set_credentials(&mut self, api_key: Option<String>, session_token: Option<String>) {
298        self.api_key = api_key;
299        self.session_token = session_token;
300        self.access_token = None; // Clear cached access token when credentials change
301    }
302
303    fn list_conversations(&self, options: &FetchOptions) -> Result<Vec<CloudConversation>> {
304        // We need mutable self to get access token, so use interior mutability pattern
305        // For now, create a new instance - this is a workaround for the trait signature
306        let mut provider = ChatGPTProvider {
307            api_key: self.api_key.clone(),
308            session_token: self.session_token.clone(),
309            access_token: self.access_token.clone(),
310            client: None,
311        };
312
313        if !provider.is_authenticated() {
314            return Err(anyhow!(
315                "ChatGPT requires authentication. Provide a session token from browser cookies.\n\
316                Run 'chasm harvest scan --web' to check browser authentication status."
317            ));
318        }
319
320        // Try to get access token and list conversations
321        let auth_header = provider.get_auth_header()?;
322        let client = provider.ensure_client()?;
323
324        let limit = options.limit.unwrap_or(50).min(100);
325        let url = format!(
326            "{}/conversations?offset=0&limit={}&order=updated",
327            CHATGPT_API_BASE, limit
328        );
329
330        let response = client
331            .get(&url)
332            .header("Authorization", &auth_header)
333            .header("Accept", "application/json")
334            .header("Content-Type", "application/json")
335            .send()
336            .map_err(|e| anyhow!("Failed to fetch conversations: {}", e))?;
337
338        if !response.status().is_success() {
339            let status = response.status();
340            let body = response.text().unwrap_or_default();
341            return Err(anyhow!(
342                "ChatGPT API returned {}: {}. Session may have expired - log in to chatgpt.com in your browser.",
343                status,
344                body
345            ));
346        }
347
348        let list_response: ConversationListResponse = response
349            .json()
350            .map_err(|e| anyhow!("Failed to parse conversation list: {}", e))?;
351
352        // Debug: Found {} conversations (total: {})
353
354        let mut conversations = Vec::new();
355        for item in list_response.items {
356            // Skip archived if not requested
357            if item.is_archived && !options.include_archived {
358                continue;
359            }
360
361            // Apply date filters
362            let created = timestamp_to_datetime(item.create_time);
363            if let Some(after) = options.after {
364                if created < after {
365                    continue;
366                }
367            }
368            if let Some(before) = options.before {
369                if created > before {
370                    continue;
371                }
372            }
373
374            conversations.push(CloudConversation {
375                id: item.id,
376                title: item.title,
377                created_at: created,
378                updated_at: item.update_time.map(timestamp_to_datetime),
379                model: None,
380                messages: Vec::new(), // Will be populated by fetch_conversation
381                metadata: None,
382            });
383        }
384
385        Ok(conversations)
386    }
387
388    fn fetch_conversation(&self, id: &str) -> Result<CloudConversation> {
389        let mut provider = ChatGPTProvider {
390            api_key: self.api_key.clone(),
391            session_token: self.session_token.clone(),
392            access_token: self.access_token.clone(),
393            client: None,
394        };
395
396        if !provider.is_authenticated() {
397            return Err(anyhow!("ChatGPT requires authentication"));
398        }
399
400        let auth_header = provider.get_auth_header()?;
401        let client = provider.ensure_client()?;
402
403        let url = format!("{}/conversation/{}", CHATGPT_API_BASE, id);
404
405        let response = client
406            .get(&url)
407            .header("Authorization", &auth_header)
408            .header("Accept", "application/json")
409            .send()
410            .map_err(|e| anyhow!("Failed to fetch conversation {}: {}", id, e))?;
411
412        if !response.status().is_success() {
413            let status = response.status();
414            return Err(anyhow!(
415                "Failed to fetch conversation {}: HTTP {}",
416                id,
417                status
418            ));
419        }
420
421        let detail: ConversationDetailResponse = response
422            .json()
423            .map_err(|e| anyhow!("Failed to parse conversation {}: {}", id, e))?;
424
425        // Extract messages from the mapping tree
426        // Build a map of node IDs to their messages
427        let mut message_order: Vec<(String, CloudMessage)> = Vec::new();
428
429        for (node_id, node) in &detail.mapping {
430            if let Some(ref msg_content) = node.message {
431                let role = &msg_content.author.role;
432
433                // Skip system messages and tool messages
434                if role == "system" || role == "tool" {
435                    continue;
436                }
437
438                let content = msg_content
439                    .content
440                    .parts
441                    .as_ref()
442                    .map(|parts| {
443                        parts
444                            .iter()
445                            .filter_map(|p| p.as_str().map(String::from))
446                            .collect::<Vec<_>>()
447                            .join("\n")
448                    })
449                    .or_else(|| msg_content.content.text.clone())
450                    .unwrap_or_default();
451
452                if content.is_empty() {
453                    continue;
454                }
455
456                let cloud_message = CloudMessage {
457                    id: Some(msg_content.id.clone()),
458                    role: role.clone(),
459                    content,
460                    timestamp: msg_content.create_time.map(timestamp_to_datetime),
461                    model: detail.model.as_ref().and_then(|m| m.slug.clone()),
462                };
463
464                message_order.push((node_id.clone(), cloud_message));
465            }
466        }
467
468        // Sort messages by timestamp if available
469        message_order.sort_by(|a, b| {
470            let ts_a = a.1.timestamp.unwrap_or(DateTime::<Utc>::MIN_UTC);
471            let ts_b = b.1.timestamp.unwrap_or(DateTime::<Utc>::MIN_UTC);
472            ts_a.cmp(&ts_b)
473        });
474
475        let messages: Vec<CloudMessage> = message_order.into_iter().map(|(_, msg)| msg).collect();
476
477        Ok(CloudConversation {
478            id: id.to_string(),
479            title: detail.title,
480            created_at: timestamp_to_datetime(detail.create_time),
481            updated_at: detail.update_time.map(timestamp_to_datetime),
482            model: detail.model.and_then(|m| m.slug),
483            messages,
484            metadata: None,
485        })
486    }
487
488    fn api_key_env_var(&self) -> &'static str {
489        "OPENAI_API_KEY"
490    }
491}
492
493/// Parse a ChatGPT export file (JSON format from "Export data" feature)
494pub fn parse_chatgpt_export(json_data: &str) -> Result<Vec<CloudConversation>> {
495    let conversations: Vec<ChatGPTExportConversation> = serde_json::from_str(json_data)?;
496
497    Ok(conversations
498        .into_iter()
499        .map(|conv| CloudConversation {
500            id: conv.id,
501            title: conv.title,
502            created_at: timestamp_to_datetime(conv.create_time),
503            updated_at: conv.update_time.map(timestamp_to_datetime),
504            model: None,
505            messages: conv
506                .mapping
507                .into_iter()
508                .filter_map(|(_, node)| {
509                    node.message.map(|msg| {
510                        let content = msg
511                            .content
512                            .parts
513                            .map(|parts| {
514                                parts
515                                    .into_iter()
516                                    .filter_map(|p| p.as_str().map(String::from))
517                                    .collect::<Vec<_>>()
518                                    .join("\n")
519                            })
520                            .or(msg.content.text)
521                            .unwrap_or_default();
522
523                        CloudMessage {
524                            id: Some(msg.id),
525                            role: msg.author.role,
526                            content,
527                            timestamp: msg.create_time.map(timestamp_to_datetime),
528                            model: None,
529                        }
530                    })
531                })
532                .filter(|m| !m.content.is_empty() && m.role != "system")
533                .collect(),
534            metadata: None,
535        })
536        .collect())
537}
538
539#[derive(Debug, Deserialize)]
540struct ChatGPTExportConversation {
541    id: String,
542    title: Option<String>,
543    create_time: f64,
544    update_time: Option<f64>,
545    mapping: std::collections::HashMap<String, ChatGPTExportNode>,
546}
547
548#[derive(Debug, Deserialize)]
549struct ChatGPTExportNode {
550    message: Option<ChatGPTExportMessage>,
551}
552
553#[derive(Debug, Deserialize)]
554struct ChatGPTExportMessage {
555    id: String,
556    author: ChatGPTExportAuthor,
557    create_time: Option<f64>,
558    content: ChatGPTExportContent,
559}
560
561#[derive(Debug, Deserialize)]
562struct ChatGPTExportAuthor {
563    role: String,
564}
565
566#[derive(Debug, Deserialize)]
567struct ChatGPTExportContent {
568    #[serde(default)]
569    parts: Option<Vec<serde_json::Value>>,
570    #[serde(default)]
571    text: Option<String>,
572}
573
574fn timestamp_to_datetime(ts: f64) -> DateTime<Utc> {
575    use chrono::TimeZone;
576    Utc.timestamp_opt(ts as i64, ((ts.fract()) * 1_000_000_000.0) as u32)
577        .single()
578        .unwrap_or_else(Utc::now)
579}
580
581#[cfg(test)]
582mod tests {
583    use super::*;
584
585    #[test]
586    fn test_chatgpt_provider_new() {
587        let provider = ChatGPTProvider::new(Some("test-key".to_string()));
588        assert_eq!(provider.name(), "ChatGPT");
589        assert!(provider.is_authenticated());
590    }
591
592    #[test]
593    fn test_chatgpt_provider_unauthenticated() {
594        let provider = ChatGPTProvider::new(None);
595        assert!(!provider.is_authenticated());
596    }
597
598    #[test]
599    fn test_timestamp_to_datetime() {
600        let ts = 1700000000.123;
601        let dt = timestamp_to_datetime(ts);
602        assert_eq!(dt.timestamp(), 1700000000);
603    }
604}