1use std::env;
2use std::fs;
3use std::path::{Path, PathBuf};
4use std::process::Command;
5
6use base64::prelude::*;
7use reqwest::blocking::{multipart, Client};
8use reqwest::header::{HeaderMap, HeaderValue, AUTHORIZATION, USER_AGENT};
9use serde::Deserialize;
10use serde_json::Value;
11use thiserror::Error;
12
13pub const DEFAULT_ENDPOINT: &str = "https://chatgpt.com/backend-api/transcribe";
14pub const DEFAULT_ORIGINATOR: &str = "Codex Desktop";
15const DEFAULT_DESKTOP_VERSION: &str = "26.429.30905";
16
17#[derive(Debug, Error)]
18pub enum CodexAsrError {
19 #[error("failed to read {path}: {source}")]
20 ReadFile {
21 path: PathBuf,
22 #[source]
23 source: std::io::Error,
24 },
25 #[error("failed to parse {path}: {source}")]
26 ParseAuth {
27 path: PathBuf,
28 #[source]
29 source: serde_json::Error,
30 },
31 #[error("Codex auth at {path} does not contain a ChatGPT access token")]
32 MissingAccessToken { path: PathBuf },
33 #[error("Codex auth mode is {mode}, not ChatGPT token auth")]
34 UnsupportedAuthMode { mode: String },
35 #[error("invalid bearer token")]
36 InvalidBearer,
37 #[error("invalid header value: {0}")]
38 InvalidHeader(#[from] reqwest::header::InvalidHeaderValue),
39 #[error("failed to build HTTP client: {0}")]
40 BuildClient(#[source] reqwest::Error),
41 #[error("failed to build multipart request: {0}")]
42 BuildMultipart(#[source] reqwest::Error),
43 #[error("transcribe request failed: {0}")]
44 Request(#[source] reqwest::Error),
45 #[error("transcribe request failed with HTTP {status}: {body}")]
46 Http { status: u16, body: String },
47 #[error("transcribe response did not contain text")]
48 MissingText,
49}
50
51pub type Result<T> = std::result::Result<T, CodexAsrError>;
52
53#[derive(Debug, Clone)]
54pub struct CodexAuth {
55 pub access_token: String,
56 pub account_id: Option<String>,
57 pub path: Option<PathBuf>,
58}
59
60impl CodexAuth {
61 pub fn from_bearer(token: impl AsRef<str>, account_id: Option<String>) -> Result<Self> {
62 let access_token =
63 strip_bearer_prefix(token.as_ref()).ok_or(CodexAsrError::InvalidBearer)?;
64 let account_id = account_id.or_else(|| account_id_from_access_token(&access_token));
65 Ok(Self {
66 access_token,
67 account_id,
68 path: None,
69 })
70 }
71
72 pub fn from_codex_home() -> Result<Self> {
73 Self::from_auth_file(default_auth_file())
74 }
75
76 pub fn from_auth_file(path: impl AsRef<Path>) -> Result<Self> {
77 let path = path.as_ref().to_path_buf();
78 let raw = fs::read_to_string(&path).map_err(|source| CodexAsrError::ReadFile {
79 path: path.clone(),
80 source,
81 })?;
82 let parsed: AuthFile =
83 serde_json::from_str(&raw).map_err(|source| CodexAsrError::ParseAuth {
84 path: path.clone(),
85 source,
86 })?;
87 let mode = parsed.auth_mode.or(parsed.auth_mode_camel);
88 if let Some(mode) = mode {
89 if mode != "chatgpt" && mode != "chatgpt_auth_tokens" {
90 return Err(CodexAsrError::UnsupportedAuthMode { mode });
91 }
92 }
93 let tokens = parsed.tokens;
94 let token = tokens
95 .as_ref()
96 .and_then(|tokens| tokens.access_token.clone())
97 .filter(|token| !token.trim().is_empty())
98 .ok_or_else(|| CodexAsrError::MissingAccessToken { path: path.clone() })?;
99 let account_id = tokens
100 .and_then(|tokens| tokens.account_id)
101 .or_else(|| account_id_from_access_token(&token));
102 Ok(Self {
103 access_token: token,
104 account_id,
105 path: Some(path),
106 })
107 }
108}
109
110#[derive(Debug, Clone)]
111pub struct CodexAsrClient {
112 endpoint: String,
113 auth: CodexAuth,
114 http: Client,
115 originator: String,
116 user_agent: String,
117}
118
119impl CodexAsrClient {
120 pub fn builder(auth: CodexAuth) -> CodexAsrClientBuilder {
121 CodexAsrClientBuilder::new(auth)
122 }
123
124 pub fn from_codex_home() -> Result<Self> {
125 Self::builder(CodexAuth::from_codex_home()?).build()
126 }
127
128 pub fn transcribe_file(
129 &self,
130 path: impl AsRef<Path>,
131 options: TranscribeOptions,
132 ) -> Result<Transcription> {
133 let path = path.as_ref();
134 let audio = fs::read(path).map_err(|source| CodexAsrError::ReadFile {
135 path: path.to_path_buf(),
136 source,
137 })?;
138 let content_type = options
139 .content_type
140 .unwrap_or_else(|| infer_content_type(path).to_string());
141 let filename = options
142 .filename
143 .unwrap_or_else(|| upload_filename(path, &content_type));
144 self.transcribe_bytes(audio, &filename, &content_type, options.language)
145 }
146
147 pub fn transcribe_bytes(
148 &self,
149 audio: Vec<u8>,
150 filename: &str,
151 content_type: &str,
152 language: Option<String>,
153 ) -> Result<Transcription> {
154 let part = multipart::Part::bytes(audio)
155 .file_name(filename.replace('"', ""))
156 .mime_str(content_type)
157 .map_err(CodexAsrError::BuildMultipart)?;
158 let mut form = multipart::Form::new().part("file", part);
159 if let Some(language) = language {
160 form = form.text("language", language);
161 }
162
163 let response = self
164 .http
165 .post(&self.endpoint)
166 .headers(self.auth_headers()?)
167 .multipart(form)
168 .send()
169 .map_err(CodexAsrError::Request)?;
170 let status = response.status();
171 let body = response.text().map_err(CodexAsrError::Request)?;
172 if !status.is_success() {
173 return Err(CodexAsrError::Http {
174 status: status.as_u16(),
175 body: clip_response_body(&body),
176 });
177 }
178 let parsed: TranscribeResponse =
179 serde_json::from_str(&body).map_err(|source| CodexAsrError::ParseAuth {
180 path: PathBuf::from("<transcribe response>"),
181 source,
182 })?;
183 let text = parsed.text.ok_or(CodexAsrError::MissingText)?;
184 Ok(Transcription { text })
185 }
186
187 fn auth_headers(&self) -> Result<HeaderMap> {
188 let mut headers = HeaderMap::new();
189 headers.insert(
190 AUTHORIZATION,
191 HeaderValue::from_str(&format!("Bearer {}", self.auth.access_token))?,
192 );
193 headers.insert("originator", HeaderValue::from_str(&self.originator)?);
194 headers.insert(USER_AGENT, HeaderValue::from_str(&self.user_agent)?);
195 if let Some(account_id) = &self.auth.account_id {
196 headers.insert("ChatGPT-Account-Id", HeaderValue::from_str(account_id)?);
197 }
198 Ok(headers)
199 }
200}
201
202#[derive(Debug, Clone)]
203pub struct CodexAsrClientBuilder {
204 auth: CodexAuth,
205 endpoint: String,
206 proxy: Option<String>,
207 originator: String,
208 user_agent: String,
209}
210
211impl CodexAsrClientBuilder {
212 pub fn new(auth: CodexAuth) -> Self {
213 let version =
214 detect_codex_desktop_version().unwrap_or_else(|| DEFAULT_DESKTOP_VERSION.to_string());
215 Self {
216 auth,
217 endpoint: DEFAULT_ENDPOINT.to_string(),
218 proxy: resolve_proxy(None),
219 originator: DEFAULT_ORIGINATOR.to_string(),
220 user_agent: format!(
221 "{DEFAULT_ORIGINATOR}/{version} ({}; {})",
222 env::consts::OS,
223 env::consts::ARCH
224 ),
225 }
226 }
227
228 pub fn endpoint(mut self, endpoint: impl Into<String>) -> Self {
229 self.endpoint = endpoint.into();
230 self
231 }
232
233 pub fn proxy(mut self, proxy: Option<String>) -> Self {
234 self.proxy = proxy;
235 self
236 }
237
238 pub fn user_agent(mut self, user_agent: impl Into<String>) -> Self {
239 self.user_agent = user_agent.into();
240 self
241 }
242
243 pub fn build(self) -> Result<CodexAsrClient> {
244 let mut builder = Client::builder();
245 if let Some(proxy) = self.proxy {
246 builder =
247 builder.proxy(reqwest::Proxy::https(&proxy).map_err(CodexAsrError::BuildClient)?);
248 }
249 let http = builder.build().map_err(CodexAsrError::BuildClient)?;
250 Ok(CodexAsrClient {
251 endpoint: self.endpoint,
252 auth: self.auth,
253 http,
254 originator: self.originator,
255 user_agent: self.user_agent,
256 })
257 }
258}
259
260#[derive(Debug, Clone, Default)]
261pub struct TranscribeOptions {
262 pub language: Option<String>,
263 pub content_type: Option<String>,
264 pub filename: Option<String>,
265}
266
267#[derive(Debug, Clone, PartialEq, Eq)]
268pub struct Transcription {
269 pub text: String,
270}
271
272#[derive(Debug, Deserialize)]
273struct AuthFile {
274 auth_mode: Option<String>,
275 #[serde(rename = "authMode")]
276 auth_mode_camel: Option<String>,
277 tokens: Option<AuthTokens>,
278}
279
280#[derive(Debug, Deserialize)]
281struct AuthTokens {
282 access_token: Option<String>,
283 account_id: Option<String>,
284}
285
286#[derive(Debug, Deserialize)]
287struct TranscribeResponse {
288 text: Option<String>,
289}
290
291pub fn infer_content_type(path: impl AsRef<Path>) -> &'static str {
292 match path
293 .as_ref()
294 .extension()
295 .and_then(|ext| ext.to_str())
296 .unwrap_or("")
297 .to_ascii_lowercase()
298 .as_str()
299 {
300 "wav" | "wave" => "audio/wav",
301 "webm" => "audio/webm",
302 "mp3" => "audio/mpeg",
303 "m4a" | "mp4" => "audio/mp4",
304 "ogg" | "oga" => "audio/ogg",
305 "flac" => "audio/flac",
306 _ => "application/octet-stream",
307 }
308}
309
310fn upload_filename(path: &Path, content_type: &str) -> String {
311 let original = path
312 .file_name()
313 .and_then(|name| name.to_str())
314 .unwrap_or("codex")
315 .to_string();
316 if infer_content_type(path) != "application/octet-stream" {
317 return original;
318 }
319 let Some(extension) = extension_for_content_type(content_type) else {
320 return original;
321 };
322 let stem = path
323 .file_stem()
324 .and_then(|stem| stem.to_str())
325 .filter(|stem| !stem.is_empty() && !stem.starts_with('.'))
326 .unwrap_or("codex");
327 format!("{stem}.{extension}")
328}
329
330fn extension_for_content_type(content_type: &str) -> Option<&'static str> {
331 match content_type
332 .split(';')
333 .next()
334 .unwrap_or("")
335 .trim()
336 .to_ascii_lowercase()
337 .as_str()
338 {
339 "audio/wav" | "audio/x-wav" | "audio/wave" => Some("wav"),
340 "audio/mpeg" | "audio/mp3" => Some("mp3"),
341 "audio/mp4" | "audio/m4a" | "audio/x-m4a" => Some("m4a"),
342 "audio/flac" | "audio/x-flac" => Some("flac"),
343 "audio/ogg" => Some("ogg"),
344 "audio/webm" => Some("webm"),
345 _ => None,
346 }
347}
348
349pub fn default_auth_file() -> PathBuf {
350 let codex_home = env::var_os("CODEX_HOME")
351 .map(PathBuf::from)
352 .unwrap_or_else(|| home_dir().join(".codex"));
353 codex_home.join("auth.json")
354}
355
356pub fn resolve_proxy(explicit_proxy: Option<&str>) -> Option<String> {
357 first_non_empty([
358 explicit_proxy.map(str::to_string),
359 env::var("CODEX_ASR_PROXY").ok(),
360 env::var("CODEX_VOICE_PROXY").ok(),
361 env::var("HTTPS_PROXY").ok(),
362 env::var("https_proxy").ok(),
363 env::var("ALL_PROXY").ok(),
364 env::var("all_proxy").ok(),
365 system_https_proxy(),
366 ])
367}
368
369fn first_non_empty(values: impl IntoIterator<Item = Option<String>>) -> Option<String> {
370 values
371 .into_iter()
372 .flatten()
373 .map(|value| value.trim().to_string())
374 .find(|value| !value.is_empty())
375}
376
377fn system_https_proxy() -> Option<String> {
378 if cfg!(target_os = "macos") {
379 let output = Command::new("scutil").arg("--proxy").output().ok()?;
380 if !output.status.success() {
381 return None;
382 }
383 return parse_scutil_https_proxy(&String::from_utf8_lossy(&output.stdout));
384 }
385 None
386}
387
388fn parse_scutil_https_proxy(output: &str) -> Option<String> {
389 let mut enabled = false;
390 let mut host = None;
391 let mut port = None;
392 for line in output.lines() {
393 let Some((key, value)) = line.split_once(':') else {
394 continue;
395 };
396 match key.trim() {
397 "HTTPSEnable" => enabled = value.trim() == "1",
398 "HTTPSProxy" => host = Some(value.trim().to_string()),
399 "HTTPSPort" => port = Some(value.trim().to_string()),
400 _ => {}
401 }
402 }
403 if enabled {
404 Some(format!("http://{}:{}", host?, port?))
405 } else {
406 None
407 }
408}
409
410fn strip_bearer_prefix(token: &str) -> Option<String> {
411 let trimmed = token.trim();
412 let token = trimmed
413 .strip_prefix("Bearer ")
414 .or_else(|| trimmed.strip_prefix("bearer "))
415 .unwrap_or(trimmed)
416 .trim();
417 (!token.is_empty()).then(|| token.to_string())
418}
419
420fn account_id_from_access_token(access_token: &str) -> Option<String> {
421 let payload = access_token.split('.').nth(1)?;
422 let decoded = BASE64_URL_SAFE_NO_PAD.decode(payload).ok()?;
423 let value: Value = serde_json::from_slice(&decoded).ok()?;
424 value
425 .get("https://api.openai.com/auth")?
426 .get("chatgpt_account_id")?
427 .as_str()
428 .filter(|value| !value.is_empty())
429 .map(ToOwned::to_owned)
430}
431
432fn detect_codex_desktop_version() -> Option<String> {
433 let plist = fs::read_to_string("/Applications/Codex.app/Contents/Info.plist").ok()?;
434 let marker = "<key>CFBundleShortVersionString</key>";
435 let rest = plist.split_once(marker)?.1;
436 let start = rest.find("<string>")? + "<string>".len();
437 let end = rest[start..].find("</string>")?;
438 Some(rest[start..start + end].to_string())
439}
440
441fn home_dir() -> PathBuf {
442 env::var_os("HOME")
443 .map(PathBuf::from)
444 .unwrap_or_else(|| PathBuf::from("."))
445}
446
447fn clip_response_body(body: &str) -> String {
448 let mut clipped = body.split_whitespace().collect::<Vec<_>>().join(" ");
449 clipped.truncate(300);
450 clipped
451}
452
453#[cfg(test)]
454mod tests {
455 use super::*;
456
457 #[test]
458 fn content_type_is_inferred_from_extension() {
459 assert_eq!(infer_content_type("voice.wav"), "audio/wav");
460 assert_eq!(infer_content_type("voice.webm"), "audio/webm");
461 assert_eq!(
462 infer_content_type("voice.unknown"),
463 "application/octet-stream"
464 );
465 }
466
467 #[test]
468 fn upload_filename_uses_content_type_when_extension_is_unknown() {
469 assert_eq!(
470 upload_filename(Path::new("voice"), "audio/wav"),
471 "voice.wav"
472 );
473 assert_eq!(
474 upload_filename(Path::new("voice.bin"), "audio/webm"),
475 "voice.webm"
476 );
477 assert_eq!(
478 upload_filename(Path::new("voice.wav"), "audio/webm"),
479 "voice.wav"
480 );
481 assert_eq!(
482 upload_filename(Path::new("voice.bin"), "application/octet-stream"),
483 "voice.bin"
484 );
485 }
486
487 #[test]
488 fn bearer_prefix_is_optional() {
489 assert_eq!(
490 strip_bearer_prefix("Bearer abc.def").as_deref(),
491 Some("abc.def")
492 );
493 assert_eq!(strip_bearer_prefix("abc.def").as_deref(), Some("abc.def"));
494 assert_eq!(strip_bearer_prefix(" ").as_deref(), None);
495 }
496
497 #[test]
498 fn account_id_can_be_read_from_chatgpt_jwt_payload() {
499 let payload = BASE64_URL_SAFE_NO_PAD
500 .encode(r#"{"https://api.openai.com/auth":{"chatgpt_account_id":"acct_123"}}"#);
501 let token = format!("header.{payload}.sig");
502 assert_eq!(
503 account_id_from_access_token(&token).as_deref(),
504 Some("acct_123")
505 );
506 }
507
508 #[test]
509 fn macos_https_proxy_is_parsed() {
510 let output = r#"
511<dictionary> {
512 HTTPSEnable : 1
513 HTTPSProxy : 127.0.0.1
514 HTTPSPort : 7892
515}
516"#;
517 assert_eq!(
518 parse_scutil_https_proxy(output).as_deref(),
519 Some("http://127.0.0.1:7892")
520 );
521 }
522}