1use std::env;
2use std::fs;
3use std::path::{Path, PathBuf};
4use std::process::Command;
5use std::time::Duration;
6
7use base64::prelude::*;
8use reqwest::blocking::{multipart, Client};
9use reqwest::header::{HeaderMap, HeaderValue, AUTHORIZATION, USER_AGENT};
10use serde::Deserialize;
11use serde_json::Value;
12use thiserror::Error;
13
14pub const DEFAULT_ENDPOINT: &str = "https://chatgpt.com/backend-api/transcribe";
15pub const DEFAULT_ORIGINATOR: &str = "Codex Desktop";
16const DEFAULT_DESKTOP_VERSION: &str = "26.429.30905";
17
18#[derive(Debug, Error)]
19pub enum CodexAsrError {
20 #[error("failed to read {path}: {source}")]
21 ReadFile {
22 path: PathBuf,
23 #[source]
24 source: std::io::Error,
25 },
26 #[error("failed to parse {path}: {source}")]
27 ParseAuth {
28 path: PathBuf,
29 #[source]
30 source: serde_json::Error,
31 },
32 #[error("Codex auth at {path} does not contain a ChatGPT access token")]
33 MissingAccessToken { path: PathBuf },
34 #[error("Codex auth mode is {mode}, not ChatGPT token auth")]
35 UnsupportedAuthMode { mode: String },
36 #[error("invalid bearer token")]
37 InvalidBearer,
38 #[error("invalid header value: {0}")]
39 InvalidHeader(#[from] reqwest::header::InvalidHeaderValue),
40 #[error("failed to build HTTP client: {0}")]
41 BuildClient(#[source] reqwest::Error),
42 #[error("failed to build multipart request: {0}")]
43 BuildMultipart(#[source] reqwest::Error),
44 #[error("transcribe request failed: {0}")]
45 Request(#[source] reqwest::Error),
46 #[error("transcribe request failed with HTTP {status}: {body}")]
47 Http { status: u16, body: String },
48 #[error("transcribe response did not contain text")]
49 MissingText,
50}
51
52pub type Result<T> = std::result::Result<T, CodexAsrError>;
53
54#[derive(Debug, Clone)]
55pub struct CodexAuth {
56 pub access_token: String,
57 pub account_id: Option<String>,
58 pub path: Option<PathBuf>,
59}
60
61impl CodexAuth {
62 pub fn from_bearer(token: impl AsRef<str>, account_id: Option<String>) -> Result<Self> {
63 let access_token =
64 strip_bearer_prefix(token.as_ref()).ok_or(CodexAsrError::InvalidBearer)?;
65 let account_id = account_id.or_else(|| account_id_from_access_token(&access_token));
66 Ok(Self {
67 access_token,
68 account_id,
69 path: None,
70 })
71 }
72
73 pub fn from_codex_home() -> Result<Self> {
74 Self::from_auth_file(default_auth_file())
75 }
76
77 pub fn from_auth_file(path: impl AsRef<Path>) -> Result<Self> {
78 let path = path.as_ref().to_path_buf();
79 let raw = fs::read_to_string(&path).map_err(|source| CodexAsrError::ReadFile {
80 path: path.clone(),
81 source,
82 })?;
83 let parsed: AuthFile =
84 serde_json::from_str(&raw).map_err(|source| CodexAsrError::ParseAuth {
85 path: path.clone(),
86 source,
87 })?;
88 let mode = parsed.auth_mode.or(parsed.auth_mode_camel);
89 if let Some(mode) = mode {
90 if mode != "chatgpt" && mode != "chatgpt_auth_tokens" {
91 return Err(CodexAsrError::UnsupportedAuthMode { mode });
92 }
93 }
94 let tokens = parsed.tokens;
95 let token = tokens
96 .as_ref()
97 .and_then(|tokens| tokens.access_token.clone())
98 .filter(|token| !token.trim().is_empty())
99 .ok_or_else(|| CodexAsrError::MissingAccessToken { path: path.clone() })?;
100 let account_id = tokens
101 .and_then(|tokens| tokens.account_id)
102 .or_else(|| account_id_from_access_token(&token));
103 Ok(Self {
104 access_token: token,
105 account_id,
106 path: Some(path),
107 })
108 }
109}
110
111#[derive(Debug, Clone)]
112pub struct CodexAsrClient {
113 endpoint: String,
114 auth: CodexAuth,
115 http: Client,
116 originator: String,
117 user_agent: String,
118}
119
120impl CodexAsrClient {
121 pub fn builder(auth: CodexAuth) -> CodexAsrClientBuilder {
122 CodexAsrClientBuilder::new(auth)
123 }
124
125 pub fn from_codex_home() -> Result<Self> {
126 Self::builder(CodexAuth::from_codex_home()?).build()
127 }
128
129 pub fn transcribe_file(
130 &self,
131 path: impl AsRef<Path>,
132 options: TranscribeOptions,
133 ) -> Result<Transcription> {
134 let path = path.as_ref();
135 let audio = fs::read(path).map_err(|source| CodexAsrError::ReadFile {
136 path: path.to_path_buf(),
137 source,
138 })?;
139 let content_type = options
140 .content_type
141 .unwrap_or_else(|| infer_content_type(path).to_string());
142 let filename = options
143 .filename
144 .unwrap_or_else(|| upload_filename(path, &content_type));
145 self.transcribe_bytes(audio, &filename, &content_type, options.language)
146 }
147
148 pub fn transcribe_bytes(
149 &self,
150 audio: Vec<u8>,
151 filename: &str,
152 content_type: &str,
153 language: Option<String>,
154 ) -> Result<Transcription> {
155 let part = multipart::Part::bytes(audio)
156 .file_name(filename.replace('"', ""))
157 .mime_str(content_type)
158 .map_err(CodexAsrError::BuildMultipart)?;
159 let mut form = multipart::Form::new().part("file", part);
160 if let Some(language) = language {
161 form = form.text("language", language);
162 }
163
164 let response = self
165 .http
166 .post(&self.endpoint)
167 .headers(self.auth_headers()?)
168 .multipart(form)
169 .send()
170 .map_err(CodexAsrError::Request)?;
171 let status = response.status();
172 let body = response.text().map_err(CodexAsrError::Request)?;
173 if !status.is_success() {
174 return Err(CodexAsrError::Http {
175 status: status.as_u16(),
176 body: clip_response_body(&body),
177 });
178 }
179 let parsed: TranscribeResponse =
180 serde_json::from_str(&body).map_err(|source| CodexAsrError::ParseAuth {
181 path: PathBuf::from("<transcribe response>"),
182 source,
183 })?;
184 let text = parsed.text.ok_or(CodexAsrError::MissingText)?;
185 Ok(Transcription { text })
186 }
187
188 fn auth_headers(&self) -> Result<HeaderMap> {
189 let mut headers = HeaderMap::new();
190 headers.insert(
191 AUTHORIZATION,
192 HeaderValue::from_str(&format!("Bearer {}", self.auth.access_token))?,
193 );
194 headers.insert("originator", HeaderValue::from_str(&self.originator)?);
195 headers.insert(USER_AGENT, HeaderValue::from_str(&self.user_agent)?);
196 if let Some(account_id) = &self.auth.account_id {
197 headers.insert("ChatGPT-Account-Id", HeaderValue::from_str(account_id)?);
198 }
199 Ok(headers)
200 }
201}
202
203#[derive(Debug, Clone)]
204pub struct CodexAsrClientBuilder {
205 auth: CodexAuth,
206 endpoint: String,
207 proxy: Option<String>,
208 timeout: Option<Duration>,
209 connect_timeout: Option<Duration>,
210 originator: String,
211 user_agent: String,
212}
213
214impl CodexAsrClientBuilder {
215 pub fn new(auth: CodexAuth) -> Self {
216 let version =
217 detect_codex_desktop_version().unwrap_or_else(|| DEFAULT_DESKTOP_VERSION.to_string());
218 Self {
219 auth,
220 endpoint: DEFAULT_ENDPOINT.to_string(),
221 proxy: resolve_proxy(None),
222 timeout: None,
223 connect_timeout: None,
224 originator: DEFAULT_ORIGINATOR.to_string(),
225 user_agent: format!(
226 "{DEFAULT_ORIGINATOR}/{version} ({}; {})",
227 env::consts::OS,
228 env::consts::ARCH
229 ),
230 }
231 }
232
233 pub fn endpoint(mut self, endpoint: impl Into<String>) -> Self {
234 self.endpoint = endpoint.into();
235 self
236 }
237
238 pub fn proxy(mut self, proxy: Option<String>) -> Self {
239 self.proxy = proxy;
240 self
241 }
242
243 pub fn timeout(mut self, timeout: Option<Duration>) -> Self {
244 self.timeout = timeout;
245 self
246 }
247
248 pub fn connect_timeout(mut self, timeout: Option<Duration>) -> Self {
249 self.connect_timeout = timeout;
250 self
251 }
252
253 pub fn user_agent(mut self, user_agent: impl Into<String>) -> Self {
254 self.user_agent = user_agent.into();
255 self
256 }
257
258 pub fn build(self) -> Result<CodexAsrClient> {
259 let mut builder = Client::builder();
260 if let Some(proxy) = self.proxy {
261 builder =
262 builder.proxy(reqwest::Proxy::https(&proxy).map_err(CodexAsrError::BuildClient)?);
263 }
264 if let Some(timeout) = self.timeout {
265 builder = builder.timeout(timeout);
266 }
267 if let Some(timeout) = self.connect_timeout {
268 builder = builder.connect_timeout(timeout);
269 }
270 let http = builder.build().map_err(CodexAsrError::BuildClient)?;
271 Ok(CodexAsrClient {
272 endpoint: self.endpoint,
273 auth: self.auth,
274 http,
275 originator: self.originator,
276 user_agent: self.user_agent,
277 })
278 }
279}
280
281#[derive(Debug, Clone, Default)]
282pub struct TranscribeOptions {
283 pub language: Option<String>,
284 pub content_type: Option<String>,
285 pub filename: Option<String>,
286}
287
288#[derive(Debug, Clone, PartialEq, Eq)]
289pub struct Transcription {
290 pub text: String,
291}
292
293#[derive(Debug, Deserialize)]
294struct AuthFile {
295 auth_mode: Option<String>,
296 #[serde(rename = "authMode")]
297 auth_mode_camel: Option<String>,
298 tokens: Option<AuthTokens>,
299}
300
301#[derive(Debug, Deserialize)]
302struct AuthTokens {
303 access_token: Option<String>,
304 account_id: Option<String>,
305}
306
307#[derive(Debug, Deserialize)]
308struct TranscribeResponse {
309 text: Option<String>,
310}
311
312pub fn infer_content_type(path: impl AsRef<Path>) -> &'static str {
313 match path
314 .as_ref()
315 .extension()
316 .and_then(|ext| ext.to_str())
317 .unwrap_or("")
318 .to_ascii_lowercase()
319 .as_str()
320 {
321 "wav" | "wave" => "audio/wav",
322 "webm" => "audio/webm",
323 "mp3" => "audio/mpeg",
324 "m4a" | "mp4" => "audio/mp4",
325 "ogg" | "oga" => "audio/ogg",
326 "flac" => "audio/flac",
327 _ => "application/octet-stream",
328 }
329}
330
331fn upload_filename(path: &Path, content_type: &str) -> String {
332 let original = path
333 .file_name()
334 .and_then(|name| name.to_str())
335 .unwrap_or("codex")
336 .to_string();
337 if infer_content_type(path) != "application/octet-stream" {
338 return original;
339 }
340 let Some(extension) = extension_for_content_type(content_type) else {
341 return original;
342 };
343 let stem = path
344 .file_stem()
345 .and_then(|stem| stem.to_str())
346 .filter(|stem| !stem.is_empty() && !stem.starts_with('.'))
347 .unwrap_or("codex");
348 format!("{stem}.{extension}")
349}
350
351fn extension_for_content_type(content_type: &str) -> Option<&'static str> {
352 match content_type
353 .split(';')
354 .next()
355 .unwrap_or("")
356 .trim()
357 .to_ascii_lowercase()
358 .as_str()
359 {
360 "audio/wav" | "audio/x-wav" | "audio/wave" => Some("wav"),
361 "audio/mpeg" | "audio/mp3" => Some("mp3"),
362 "audio/mp4" | "audio/m4a" | "audio/x-m4a" => Some("m4a"),
363 "audio/flac" | "audio/x-flac" => Some("flac"),
364 "audio/ogg" => Some("ogg"),
365 "audio/webm" => Some("webm"),
366 _ => None,
367 }
368}
369
370pub fn default_auth_file() -> PathBuf {
371 let codex_home = env::var_os("CODEX_HOME")
372 .map(PathBuf::from)
373 .unwrap_or_else(|| home_dir().join(".codex"));
374 codex_home.join("auth.json")
375}
376
377pub fn resolve_proxy(explicit_proxy: Option<&str>) -> Option<String> {
378 first_non_empty([
379 explicit_proxy.map(str::to_string),
380 env::var("CODEX_ASR_PROXY").ok(),
381 env::var("CODEX_VOICE_PROXY").ok(),
382 env::var("HTTPS_PROXY").ok(),
383 env::var("https_proxy").ok(),
384 env::var("ALL_PROXY").ok(),
385 env::var("all_proxy").ok(),
386 system_https_proxy(),
387 ])
388}
389
390fn first_non_empty(values: impl IntoIterator<Item = Option<String>>) -> Option<String> {
391 values
392 .into_iter()
393 .flatten()
394 .map(|value| value.trim().to_string())
395 .find(|value| !value.is_empty())
396}
397
398fn system_https_proxy() -> Option<String> {
399 if cfg!(target_os = "macos") {
400 let output = Command::new("scutil").arg("--proxy").output().ok()?;
401 if !output.status.success() {
402 return None;
403 }
404 return parse_scutil_https_proxy(&String::from_utf8_lossy(&output.stdout));
405 }
406 None
407}
408
409fn parse_scutil_https_proxy(output: &str) -> Option<String> {
410 let mut enabled = false;
411 let mut host = None;
412 let mut port = None;
413 for line in output.lines() {
414 let Some((key, value)) = line.split_once(':') else {
415 continue;
416 };
417 match key.trim() {
418 "HTTPSEnable" => enabled = value.trim() == "1",
419 "HTTPSProxy" => host = Some(value.trim().to_string()),
420 "HTTPSPort" => port = Some(value.trim().to_string()),
421 _ => {}
422 }
423 }
424 if enabled {
425 Some(format!("http://{}:{}", host?, port?))
426 } else {
427 None
428 }
429}
430
431fn strip_bearer_prefix(token: &str) -> Option<String> {
432 let trimmed = token.trim();
433 let token = trimmed
434 .strip_prefix("Bearer ")
435 .or_else(|| trimmed.strip_prefix("bearer "))
436 .unwrap_or(trimmed)
437 .trim();
438 (!token.is_empty()).then(|| token.to_string())
439}
440
441fn account_id_from_access_token(access_token: &str) -> Option<String> {
442 let payload = access_token.split('.').nth(1)?;
443 let decoded = BASE64_URL_SAFE_NO_PAD.decode(payload).ok()?;
444 let value: Value = serde_json::from_slice(&decoded).ok()?;
445 value
446 .get("https://api.openai.com/auth")?
447 .get("chatgpt_account_id")?
448 .as_str()
449 .filter(|value| !value.is_empty())
450 .map(ToOwned::to_owned)
451}
452
453fn detect_codex_desktop_version() -> Option<String> {
454 let plist = fs::read_to_string("/Applications/Codex.app/Contents/Info.plist").ok()?;
455 let marker = "<key>CFBundleShortVersionString</key>";
456 let rest = plist.split_once(marker)?.1;
457 let start = rest.find("<string>")? + "<string>".len();
458 let end = rest[start..].find("</string>")?;
459 Some(rest[start..start + end].to_string())
460}
461
462fn home_dir() -> PathBuf {
463 env::var_os("HOME")
464 .map(PathBuf::from)
465 .unwrap_or_else(|| PathBuf::from("."))
466}
467
468fn clip_response_body(body: &str) -> String {
469 let mut clipped = body.split_whitespace().collect::<Vec<_>>().join(" ");
470 clipped.truncate(300);
471 clipped
472}
473
474#[cfg(test)]
475mod tests {
476 use super::*;
477
478 #[test]
479 fn content_type_is_inferred_from_extension() {
480 assert_eq!(infer_content_type("voice.wav"), "audio/wav");
481 assert_eq!(infer_content_type("voice.webm"), "audio/webm");
482 assert_eq!(
483 infer_content_type("voice.unknown"),
484 "application/octet-stream"
485 );
486 }
487
488 #[test]
489 fn upload_filename_uses_content_type_when_extension_is_unknown() {
490 assert_eq!(
491 upload_filename(Path::new("voice"), "audio/wav"),
492 "voice.wav"
493 );
494 assert_eq!(
495 upload_filename(Path::new("voice.bin"), "audio/webm"),
496 "voice.webm"
497 );
498 assert_eq!(
499 upload_filename(Path::new("voice.wav"), "audio/webm"),
500 "voice.wav"
501 );
502 assert_eq!(
503 upload_filename(Path::new("voice.bin"), "application/octet-stream"),
504 "voice.bin"
505 );
506 }
507
508 #[test]
509 fn bearer_prefix_is_optional() {
510 assert_eq!(
511 strip_bearer_prefix("Bearer abc.def").as_deref(),
512 Some("abc.def")
513 );
514 assert_eq!(strip_bearer_prefix("abc.def").as_deref(), Some("abc.def"));
515 assert_eq!(strip_bearer_prefix(" ").as_deref(), None);
516 }
517
518 #[test]
519 fn account_id_can_be_read_from_chatgpt_jwt_payload() {
520 let payload = BASE64_URL_SAFE_NO_PAD
521 .encode(r#"{"https://api.openai.com/auth":{"chatgpt_account_id":"acct_123"}}"#);
522 let token = format!("header.{payload}.sig");
523 assert_eq!(
524 account_id_from_access_token(&token).as_deref(),
525 Some("acct_123")
526 );
527 }
528
529 #[test]
530 fn macos_https_proxy_is_parsed() {
531 let output = r#"
532<dictionary> {
533 HTTPSEnable : 1
534 HTTPSProxy : 127.0.0.1
535 HTTPSPort : 7892
536}
537"#;
538 assert_eq!(
539 parse_scutil_https_proxy(output).as_deref(),
540 Some("http://127.0.0.1:7892")
541 );
542 }
543}