Skip to main content

codex_asr/
lib.rs

1use std::env;
2use std::fs;
3use std::path::{Path, PathBuf};
4use std::process::Command;
5
6use base64::prelude::*;
7use reqwest::blocking::{multipart, Client};
8use reqwest::header::{HeaderMap, HeaderValue, AUTHORIZATION, USER_AGENT};
9use serde::Deserialize;
10use serde_json::Value;
11use thiserror::Error;
12
13pub const DEFAULT_ENDPOINT: &str = "https://chatgpt.com/backend-api/transcribe";
14pub const DEFAULT_ORIGINATOR: &str = "Codex Desktop";
15const DEFAULT_DESKTOP_VERSION: &str = "26.429.30905";
16
17#[derive(Debug, Error)]
18pub enum CodexAsrError {
19    #[error("failed to read {path}: {source}")]
20    ReadFile {
21        path: PathBuf,
22        #[source]
23        source: std::io::Error,
24    },
25    #[error("failed to parse {path}: {source}")]
26    ParseAuth {
27        path: PathBuf,
28        #[source]
29        source: serde_json::Error,
30    },
31    #[error("Codex auth at {path} does not contain a ChatGPT access token")]
32    MissingAccessToken { path: PathBuf },
33    #[error("Codex auth mode is {mode}, not ChatGPT token auth")]
34    UnsupportedAuthMode { mode: String },
35    #[error("invalid bearer token")]
36    InvalidBearer,
37    #[error("invalid header value: {0}")]
38    InvalidHeader(#[from] reqwest::header::InvalidHeaderValue),
39    #[error("failed to build HTTP client: {0}")]
40    BuildClient(#[source] reqwest::Error),
41    #[error("failed to build multipart request: {0}")]
42    BuildMultipart(#[source] reqwest::Error),
43    #[error("transcribe request failed: {0}")]
44    Request(#[source] reqwest::Error),
45    #[error("transcribe request failed with HTTP {status}: {body}")]
46    Http { status: u16, body: String },
47    #[error("transcribe response did not contain text")]
48    MissingText,
49}
50
51pub type Result<T> = std::result::Result<T, CodexAsrError>;
52
53#[derive(Debug, Clone)]
54pub struct CodexAuth {
55    pub access_token: String,
56    pub account_id: Option<String>,
57    pub path: Option<PathBuf>,
58}
59
60impl CodexAuth {
61    pub fn from_bearer(token: impl AsRef<str>, account_id: Option<String>) -> Result<Self> {
62        let access_token =
63            strip_bearer_prefix(token.as_ref()).ok_or(CodexAsrError::InvalidBearer)?;
64        let account_id = account_id.or_else(|| account_id_from_access_token(&access_token));
65        Ok(Self {
66            access_token,
67            account_id,
68            path: None,
69        })
70    }
71
72    pub fn from_codex_home() -> Result<Self> {
73        Self::from_auth_file(default_auth_file())
74    }
75
76    pub fn from_auth_file(path: impl AsRef<Path>) -> Result<Self> {
77        let path = path.as_ref().to_path_buf();
78        let raw = fs::read_to_string(&path).map_err(|source| CodexAsrError::ReadFile {
79            path: path.clone(),
80            source,
81        })?;
82        let parsed: AuthFile =
83            serde_json::from_str(&raw).map_err(|source| CodexAsrError::ParseAuth {
84                path: path.clone(),
85                source,
86            })?;
87        let mode = parsed.auth_mode.or(parsed.auth_mode_camel);
88        if let Some(mode) = mode {
89            if mode != "chatgpt" && mode != "chatgpt_auth_tokens" {
90                return Err(CodexAsrError::UnsupportedAuthMode { mode });
91            }
92        }
93        let tokens = parsed.tokens;
94        let token = tokens
95            .as_ref()
96            .and_then(|tokens| tokens.access_token.clone())
97            .filter(|token| !token.trim().is_empty())
98            .ok_or_else(|| CodexAsrError::MissingAccessToken { path: path.clone() })?;
99        let account_id = tokens
100            .and_then(|tokens| tokens.account_id)
101            .or_else(|| account_id_from_access_token(&token));
102        Ok(Self {
103            access_token: token,
104            account_id,
105            path: Some(path),
106        })
107    }
108}
109
110#[derive(Debug, Clone)]
111pub struct CodexAsrClient {
112    endpoint: String,
113    auth: CodexAuth,
114    http: Client,
115    originator: String,
116    user_agent: String,
117}
118
119impl CodexAsrClient {
120    pub fn builder(auth: CodexAuth) -> CodexAsrClientBuilder {
121        CodexAsrClientBuilder::new(auth)
122    }
123
124    pub fn from_codex_home() -> Result<Self> {
125        Self::builder(CodexAuth::from_codex_home()?).build()
126    }
127
128    pub fn transcribe_file(
129        &self,
130        path: impl AsRef<Path>,
131        options: TranscribeOptions,
132    ) -> Result<Transcription> {
133        let path = path.as_ref();
134        let audio = fs::read(path).map_err(|source| CodexAsrError::ReadFile {
135            path: path.to_path_buf(),
136            source,
137        })?;
138        let content_type = options
139            .content_type
140            .unwrap_or_else(|| infer_content_type(path).to_string());
141        let filename = options
142            .filename
143            .unwrap_or_else(|| upload_filename(path, &content_type));
144        self.transcribe_bytes(audio, &filename, &content_type, options.language)
145    }
146
147    pub fn transcribe_bytes(
148        &self,
149        audio: Vec<u8>,
150        filename: &str,
151        content_type: &str,
152        language: Option<String>,
153    ) -> Result<Transcription> {
154        let part = multipart::Part::bytes(audio)
155            .file_name(filename.replace('"', ""))
156            .mime_str(content_type)
157            .map_err(CodexAsrError::BuildMultipart)?;
158        let mut form = multipart::Form::new().part("file", part);
159        if let Some(language) = language {
160            form = form.text("language", language);
161        }
162
163        let response = self
164            .http
165            .post(&self.endpoint)
166            .headers(self.auth_headers()?)
167            .multipart(form)
168            .send()
169            .map_err(CodexAsrError::Request)?;
170        let status = response.status();
171        let body = response.text().map_err(CodexAsrError::Request)?;
172        if !status.is_success() {
173            return Err(CodexAsrError::Http {
174                status: status.as_u16(),
175                body: clip_response_body(&body),
176            });
177        }
178        let parsed: TranscribeResponse =
179            serde_json::from_str(&body).map_err(|source| CodexAsrError::ParseAuth {
180                path: PathBuf::from("<transcribe response>"),
181                source,
182            })?;
183        let text = parsed.text.ok_or(CodexAsrError::MissingText)?;
184        Ok(Transcription { text })
185    }
186
187    fn auth_headers(&self) -> Result<HeaderMap> {
188        let mut headers = HeaderMap::new();
189        headers.insert(
190            AUTHORIZATION,
191            HeaderValue::from_str(&format!("Bearer {}", self.auth.access_token))?,
192        );
193        headers.insert("originator", HeaderValue::from_str(&self.originator)?);
194        headers.insert(USER_AGENT, HeaderValue::from_str(&self.user_agent)?);
195        if let Some(account_id) = &self.auth.account_id {
196            headers.insert("ChatGPT-Account-Id", HeaderValue::from_str(account_id)?);
197        }
198        Ok(headers)
199    }
200}
201
202#[derive(Debug, Clone)]
203pub struct CodexAsrClientBuilder {
204    auth: CodexAuth,
205    endpoint: String,
206    proxy: Option<String>,
207    originator: String,
208    user_agent: String,
209}
210
211impl CodexAsrClientBuilder {
212    pub fn new(auth: CodexAuth) -> Self {
213        let version =
214            detect_codex_desktop_version().unwrap_or_else(|| DEFAULT_DESKTOP_VERSION.to_string());
215        Self {
216            auth,
217            endpoint: DEFAULT_ENDPOINT.to_string(),
218            proxy: resolve_proxy(None),
219            originator: DEFAULT_ORIGINATOR.to_string(),
220            user_agent: format!(
221                "{DEFAULT_ORIGINATOR}/{version} ({}; {})",
222                env::consts::OS,
223                env::consts::ARCH
224            ),
225        }
226    }
227
228    pub fn endpoint(mut self, endpoint: impl Into<String>) -> Self {
229        self.endpoint = endpoint.into();
230        self
231    }
232
233    pub fn proxy(mut self, proxy: Option<String>) -> Self {
234        self.proxy = proxy;
235        self
236    }
237
238    pub fn user_agent(mut self, user_agent: impl Into<String>) -> Self {
239        self.user_agent = user_agent.into();
240        self
241    }
242
243    pub fn build(self) -> Result<CodexAsrClient> {
244        let mut builder = Client::builder();
245        if let Some(proxy) = self.proxy {
246            builder =
247                builder.proxy(reqwest::Proxy::https(&proxy).map_err(CodexAsrError::BuildClient)?);
248        }
249        let http = builder.build().map_err(CodexAsrError::BuildClient)?;
250        Ok(CodexAsrClient {
251            endpoint: self.endpoint,
252            auth: self.auth,
253            http,
254            originator: self.originator,
255            user_agent: self.user_agent,
256        })
257    }
258}
259
260#[derive(Debug, Clone, Default)]
261pub struct TranscribeOptions {
262    pub language: Option<String>,
263    pub content_type: Option<String>,
264    pub filename: Option<String>,
265}
266
267#[derive(Debug, Clone, PartialEq, Eq)]
268pub struct Transcription {
269    pub text: String,
270}
271
272#[derive(Debug, Deserialize)]
273struct AuthFile {
274    auth_mode: Option<String>,
275    #[serde(rename = "authMode")]
276    auth_mode_camel: Option<String>,
277    tokens: Option<AuthTokens>,
278}
279
280#[derive(Debug, Deserialize)]
281struct AuthTokens {
282    access_token: Option<String>,
283    account_id: Option<String>,
284}
285
286#[derive(Debug, Deserialize)]
287struct TranscribeResponse {
288    text: Option<String>,
289}
290
291pub fn infer_content_type(path: impl AsRef<Path>) -> &'static str {
292    match path
293        .as_ref()
294        .extension()
295        .and_then(|ext| ext.to_str())
296        .unwrap_or("")
297        .to_ascii_lowercase()
298        .as_str()
299    {
300        "wav" | "wave" => "audio/wav",
301        "webm" => "audio/webm",
302        "mp3" => "audio/mpeg",
303        "m4a" | "mp4" => "audio/mp4",
304        "ogg" | "oga" => "audio/ogg",
305        "flac" => "audio/flac",
306        _ => "application/octet-stream",
307    }
308}
309
310fn upload_filename(path: &Path, content_type: &str) -> String {
311    let original = path
312        .file_name()
313        .and_then(|name| name.to_str())
314        .unwrap_or("codex")
315        .to_string();
316    if infer_content_type(path) != "application/octet-stream" {
317        return original;
318    }
319    let Some(extension) = extension_for_content_type(content_type) else {
320        return original;
321    };
322    let stem = path
323        .file_stem()
324        .and_then(|stem| stem.to_str())
325        .filter(|stem| !stem.is_empty() && !stem.starts_with('.'))
326        .unwrap_or("codex");
327    format!("{stem}.{extension}")
328}
329
330fn extension_for_content_type(content_type: &str) -> Option<&'static str> {
331    match content_type
332        .split(';')
333        .next()
334        .unwrap_or("")
335        .trim()
336        .to_ascii_lowercase()
337        .as_str()
338    {
339        "audio/wav" | "audio/x-wav" | "audio/wave" => Some("wav"),
340        "audio/mpeg" | "audio/mp3" => Some("mp3"),
341        "audio/mp4" | "audio/m4a" | "audio/x-m4a" => Some("m4a"),
342        "audio/flac" | "audio/x-flac" => Some("flac"),
343        "audio/ogg" => Some("ogg"),
344        "audio/webm" => Some("webm"),
345        _ => None,
346    }
347}
348
349pub fn default_auth_file() -> PathBuf {
350    let codex_home = env::var_os("CODEX_HOME")
351        .map(PathBuf::from)
352        .unwrap_or_else(|| home_dir().join(".codex"));
353    codex_home.join("auth.json")
354}
355
356pub fn resolve_proxy(explicit_proxy: Option<&str>) -> Option<String> {
357    first_non_empty([
358        explicit_proxy.map(str::to_string),
359        env::var("CODEX_ASR_PROXY").ok(),
360        env::var("CODEX_VOICE_PROXY").ok(),
361        env::var("HTTPS_PROXY").ok(),
362        env::var("https_proxy").ok(),
363        env::var("ALL_PROXY").ok(),
364        env::var("all_proxy").ok(),
365        system_https_proxy(),
366    ])
367}
368
369fn first_non_empty(values: impl IntoIterator<Item = Option<String>>) -> Option<String> {
370    values
371        .into_iter()
372        .flatten()
373        .map(|value| value.trim().to_string())
374        .find(|value| !value.is_empty())
375}
376
377fn system_https_proxy() -> Option<String> {
378    if cfg!(target_os = "macos") {
379        let output = Command::new("scutil").arg("--proxy").output().ok()?;
380        if !output.status.success() {
381            return None;
382        }
383        return parse_scutil_https_proxy(&String::from_utf8_lossy(&output.stdout));
384    }
385    None
386}
387
388fn parse_scutil_https_proxy(output: &str) -> Option<String> {
389    let mut enabled = false;
390    let mut host = None;
391    let mut port = None;
392    for line in output.lines() {
393        let Some((key, value)) = line.split_once(':') else {
394            continue;
395        };
396        match key.trim() {
397            "HTTPSEnable" => enabled = value.trim() == "1",
398            "HTTPSProxy" => host = Some(value.trim().to_string()),
399            "HTTPSPort" => port = Some(value.trim().to_string()),
400            _ => {}
401        }
402    }
403    if enabled {
404        Some(format!("http://{}:{}", host?, port?))
405    } else {
406        None
407    }
408}
409
410fn strip_bearer_prefix(token: &str) -> Option<String> {
411    let trimmed = token.trim();
412    let token = trimmed
413        .strip_prefix("Bearer ")
414        .or_else(|| trimmed.strip_prefix("bearer "))
415        .unwrap_or(trimmed)
416        .trim();
417    (!token.is_empty()).then(|| token.to_string())
418}
419
420fn account_id_from_access_token(access_token: &str) -> Option<String> {
421    let payload = access_token.split('.').nth(1)?;
422    let decoded = BASE64_URL_SAFE_NO_PAD.decode(payload).ok()?;
423    let value: Value = serde_json::from_slice(&decoded).ok()?;
424    value
425        .get("https://api.openai.com/auth")?
426        .get("chatgpt_account_id")?
427        .as_str()
428        .filter(|value| !value.is_empty())
429        .map(ToOwned::to_owned)
430}
431
432fn detect_codex_desktop_version() -> Option<String> {
433    let plist = fs::read_to_string("/Applications/Codex.app/Contents/Info.plist").ok()?;
434    let marker = "<key>CFBundleShortVersionString</key>";
435    let rest = plist.split_once(marker)?.1;
436    let start = rest.find("<string>")? + "<string>".len();
437    let end = rest[start..].find("</string>")?;
438    Some(rest[start..start + end].to_string())
439}
440
441fn home_dir() -> PathBuf {
442    env::var_os("HOME")
443        .map(PathBuf::from)
444        .unwrap_or_else(|| PathBuf::from("."))
445}
446
447fn clip_response_body(body: &str) -> String {
448    let mut clipped = body.split_whitespace().collect::<Vec<_>>().join(" ");
449    clipped.truncate(300);
450    clipped
451}
452
453#[cfg(test)]
454mod tests {
455    use super::*;
456
457    #[test]
458    fn content_type_is_inferred_from_extension() {
459        assert_eq!(infer_content_type("voice.wav"), "audio/wav");
460        assert_eq!(infer_content_type("voice.webm"), "audio/webm");
461        assert_eq!(
462            infer_content_type("voice.unknown"),
463            "application/octet-stream"
464        );
465    }
466
467    #[test]
468    fn upload_filename_uses_content_type_when_extension_is_unknown() {
469        assert_eq!(
470            upload_filename(Path::new("voice"), "audio/wav"),
471            "voice.wav"
472        );
473        assert_eq!(
474            upload_filename(Path::new("voice.bin"), "audio/webm"),
475            "voice.webm"
476        );
477        assert_eq!(
478            upload_filename(Path::new("voice.wav"), "audio/webm"),
479            "voice.wav"
480        );
481        assert_eq!(
482            upload_filename(Path::new("voice.bin"), "application/octet-stream"),
483            "voice.bin"
484        );
485    }
486
487    #[test]
488    fn bearer_prefix_is_optional() {
489        assert_eq!(
490            strip_bearer_prefix("Bearer abc.def").as_deref(),
491            Some("abc.def")
492        );
493        assert_eq!(strip_bearer_prefix("abc.def").as_deref(), Some("abc.def"));
494        assert_eq!(strip_bearer_prefix("  ").as_deref(), None);
495    }
496
497    #[test]
498    fn account_id_can_be_read_from_chatgpt_jwt_payload() {
499        let payload = BASE64_URL_SAFE_NO_PAD
500            .encode(r#"{"https://api.openai.com/auth":{"chatgpt_account_id":"acct_123"}}"#);
501        let token = format!("header.{payload}.sig");
502        assert_eq!(
503            account_id_from_access_token(&token).as_deref(),
504            Some("acct_123")
505        );
506    }
507
508    #[test]
509    fn macos_https_proxy_is_parsed() {
510        let output = r#"
511<dictionary> {
512  HTTPSEnable : 1
513  HTTPSProxy : 127.0.0.1
514  HTTPSPort : 7892
515}
516"#;
517        assert_eq!(
518            parse_scutil_https_proxy(output).as_deref(),
519            Some("http://127.0.0.1:7892")
520        );
521    }
522}