linger_openai_sdk/
audio.rs

1use crate::error::LingerError;
2use crate::transport::{BodyStream, HttpRequest};
3use crate::RequestId;
4use bytes::Bytes;
5use serde::{Deserialize, Serialize};
6use serde_json::Value;
7use std::collections::BTreeMap;
8
9/// EN: Request body for `POST /v1/audio/speech`.
10/// 中文：`POST /v1/audio/speech` 的请求体。
11#[derive(Clone, Debug, Serialize, PartialEq)]
12#[non_exhaustive]
13pub struct CreateSpeechRequest {
14    /// EN: Text-to-speech model id.
15    /// 中文：文本转语音模型 ID。
16    pub model: String,
17    /// EN: Text input to synthesize.
18    /// 中文：要合成的文本输入。
19    pub input: String,
20    /// EN: Voice used for synthesized audio.
21    /// 中文：合成音频使用的声音。
22    pub voice: String,
23    /// EN: Optional audio response format.
24    /// 中文：可选的音频响应格式。
25    #[serde(skip_serializing_if = "Option::is_none")]
26    pub response_format: Option<String>,
27    /// EN: Optional playback speed.
28    /// 中文：可选的播放速度。
29    #[serde(skip_serializing_if = "Option::is_none")]
30    pub speed: Option<f32>,
31    /// EN: Optional model instructions.
32    /// 中文：可选的模型指令。
33    #[serde(skip_serializing_if = "Option::is_none")]
34    pub instructions: Option<String>,
35    /// EN: Optional audio stream response format.
36    /// 中文：可选的音频流响应格式。
37    #[serde(skip_serializing_if = "Option::is_none")]
38    pub stream_format: Option<String>,
39    /// EN: Forward-compatible optional fields not yet covered by handwritten types.
40    /// 中文：手写类型尚未覆盖的前向兼容可选字段。
41    #[serde(flatten)]
42    pub extra: BTreeMap<String, Value>,
43}
44
45impl CreateSpeechRequest {
46    /// EN: Starts building a speech request.
47    /// 中文：开始构建语音请求。
48    pub fn builder() -> CreateSpeechRequestBuilder {
49        CreateSpeechRequestBuilder::default()
50    }
51}
52
53/// EN: Builder for speech requests.
54/// 中文：语音请求的构建器。
55#[derive(Clone, Debug, Default)]
56#[non_exhaustive]
57pub struct CreateSpeechRequestBuilder {
58    model: Option<String>,
59    input: Option<String>,
60    voice: Option<String>,
61    response_format: Option<String>,
62    speed: Option<f32>,
63    instructions: Option<String>,
64    stream_format: Option<String>,
65    extra: BTreeMap<String, Value>,
66}
67
68impl CreateSpeechRequestBuilder {
69    /// EN: Sets the speech model id.
70    /// 中文：设置语音模型 ID。
71    pub fn model(mut self, model: impl Into<String>) -> Self {
72        self.model = Some(model.into());
73        self
74    }
75
76    /// EN: Sets text input to synthesize.
77    /// 中文：设置要合成的文本输入。
78    pub fn input(mut self, input: impl Into<String>) -> Self {
79        self.input = Some(input.into());
80        self
81    }
82
83    /// EN: Sets the voice.
84    /// 中文：设置声音。
85    pub fn voice(mut self, voice: impl Into<String>) -> Self {
86        self.voice = Some(voice.into());
87        self
88    }
89
90    /// EN: Sets the optional audio response format.
91    /// 中文：设置可选的音频响应格式。
92    pub fn response_format(mut self, response_format: impl Into<String>) -> Self {
93        self.response_format = Some(response_format.into());
94        self
95    }
96
97    /// EN: Sets the optional playback speed.
98    /// 中文：设置可选的播放速度。
99    pub fn speed(mut self, speed: f32) -> Self {
100        self.speed = Some(speed);
101        self
102    }
103
104    /// EN: Sets optional model instructions.
105    /// 中文：设置可选的模型指令。
106    pub fn instructions(mut self, instructions: impl Into<String>) -> Self {
107        self.instructions = Some(instructions.into());
108        self
109    }
110
111    /// EN: Sets the optional audio stream response format, such as `audio` or `sse`.
112    /// 中文：设置可选的音频流响应格式，例如 `audio` 或 `sse`。
113    pub fn stream_format(mut self, stream_format: impl Into<String>) -> Self {
114        self.stream_format = Some(stream_format.into());
115        self
116    }
117
118    /// EN: Adds a forward-compatible JSON field.
119    /// 中文：添加前向兼容的 JSON 字段。
120    pub fn extra(mut self, name: impl Into<String>, value: Value) -> Self {
121        self.extra.insert(name.into(), value);
122        self
123    }
124
125    /// EN: Builds and validates the request.
126    /// 中文：构建并校验请求。
127    pub fn build(self) -> Result<CreateSpeechRequest, LingerError> {
128        let model = required_string("model", self.model)?;
129        let input = required_string("input", self.input)?;
130        let voice = required_string("voice", self.voice)?;
131        validate_max_chars("input", &input, 4096)?;
132        validate_optional_string("response_format", self.response_format.as_deref())?;
133        validate_optional_string("instructions", self.instructions.as_deref())?;
134        if let Some(instructions) = self.instructions.as_deref() {
135            validate_max_chars("instructions", instructions, 4096)?;
136        }
137        validate_optional_string("stream_format", self.stream_format.as_deref())?;
138        if self
139            .speed
140            .is_some_and(|speed| !(0.25..=4.0).contains(&speed))
141        {
142            return Err(LingerError::invalid_config(
143                "speed must be between 0.25 and 4.0",
144            ));
145        }
146        Ok(CreateSpeechRequest {
147            model,
148            input,
149            voice,
150            response_format: self.response_format,
151            speed: self.speed,
152            instructions: self.instructions,
153            stream_format: self.stream_format,
154            extra: self.extra,
155        })
156    }
157}
158
159/// EN: Streaming speech audio response.
160/// 中文：流式语音音频响应。
161pub struct AudioSpeechResponse {
162    request_id: Option<RequestId>,
163    content_type: Option<String>,
164    body: BodyStream,
165}
166
167impl AudioSpeechResponse {
168    pub(crate) fn new(
169        request_id: Option<RequestId>,
170        content_type: Option<String>,
171        body: BodyStream,
172    ) -> Self {
173        Self {
174            request_id,
175            content_type,
176            body,
177        }
178    }
179
180    /// EN: Returns the OpenAI request id, when present.
181    /// 中文：返回 OpenAI 请求 ID，如存在。
182    pub fn request_id(&self) -> Option<&RequestId> {
183        self.request_id.as_ref()
184    }
185
186    /// EN: Returns the response content type, when present.
187    /// 中文：返回响应内容类型，如存在。
188    pub fn content_type(&self) -> Option<&str> {
189        self.content_type.as_deref()
190    }
191
192    /// EN: Consumes this response and returns the incremental audio stream.
193    /// 中文：消耗此响应并返回增量音频流。
194    pub fn into_stream(self) -> BodyStream {
195        self.body
196    }
197}
198
199/// EN: Uploadable audio file bytes and multipart metadata.
200/// 中文：可上传音频文件字节及 multipart 元数据。
201#[derive(Clone, Debug, PartialEq, Eq)]
202#[non_exhaustive]
203pub struct AudioUpload {
204    /// EN: Filename sent in the multipart part.
205    /// 中文：multipart 分段中发送的文件名。
206    pub filename: String,
207    /// EN: Content type sent for the audio part.
208    /// 中文：音频分段发送的内容类型。
209    pub content_type: String,
210    content: Bytes,
211}
212
213impl AudioUpload {
214    /// EN: Creates an upload from already available bytes without copying them.
215    /// 中文：通过已可用字节创建上传对象，不复制这些字节。
216    pub fn from_bytes(
217        filename: impl Into<String>,
218        content: impl Into<Bytes>,
219    ) -> Result<Self, LingerError> {
220        let filename = filename.into();
221        validate_header_param("filename", &filename)?;
222        Ok(Self {
223            filename,
224            content_type: "application/octet-stream".to_string(),
225            content: content.into(),
226        })
227    }
228
229    /// EN: Sets the audio part content type.
230    /// 中文：设置音频分段的内容类型。
231    pub fn content_type(mut self, content_type: impl Into<String>) -> Result<Self, LingerError> {
232        let content_type = content_type.into();
233        validate_header_value("content_type", &content_type)?;
234        self.content_type = content_type;
235        Ok(self)
236    }
237}
238
239/// EN: Request body descriptor for `POST /v1/audio/voice_consents`.
240/// 中文：`POST /v1/audio/voice_consents` 的请求体描述。
241#[derive(Clone, Debug, PartialEq, Eq)]
242#[non_exhaustive]
243pub struct CreateVoiceConsentRequest {
244    /// EN: Label for the consent recording.
245    /// 中文：consent 录音标签。
246    pub name: String,
247    /// EN: BCP 47 language tag for the consent phrase.
248    /// 中文：consent 短语的 BCP 47 语言标签。
249    pub language: String,
250    /// EN: Consent recording audio file.
251    /// 中文：consent 录音音频文件。
252    pub recording: AudioUpload,
253}
254
255impl CreateVoiceConsentRequest {
256    /// EN: Starts building a voice-consent request.
257    /// 中文：开始构建 voice consent 请求。
258    pub fn builder() -> CreateVoiceConsentRequestBuilder {
259        CreateVoiceConsentRequestBuilder::default()
260    }
261
262    pub(crate) fn apply_multipart_body(&self, request: &mut HttpRequest) {
263        let fields = vec![
264            ("name".to_string(), self.name.clone()),
265            ("language".to_string(), self.language.clone()),
266        ];
267        apply_audio_multipart(request, "recording", &self.recording, fields);
268    }
269}
270
271/// EN: Builder for voice-consent requests.
272/// 中文：voice consent 请求的构建器。
273#[derive(Clone, Debug, Default)]
274#[non_exhaustive]
275pub struct CreateVoiceConsentRequestBuilder {
276    name: Option<String>,
277    language: Option<String>,
278    recording: Option<AudioUpload>,
279}
280
281impl CreateVoiceConsentRequestBuilder {
282    /// EN: Sets the consent recording label.
283    /// 中文：设置 consent 录音标签。
284    pub fn name(mut self, name: impl Into<String>) -> Self {
285        self.name = Some(name.into());
286        self
287    }
288
289    /// EN: Sets the consent phrase language.
290    /// 中文：设置 consent 短语语言。
291    pub fn language(mut self, language: impl Into<String>) -> Self {
292        self.language = Some(language.into());
293        self
294    }
295
296    /// EN: Sets the consent recording file.
297    /// 中文：设置 consent 录音文件。
298    pub fn recording(mut self, recording: AudioUpload) -> Self {
299        self.recording = Some(recording);
300        self
301    }
302
303    /// EN: Builds and validates the request.
304    /// 中文：构建并校验请求。
305    pub fn build(self) -> Result<CreateVoiceConsentRequest, LingerError> {
306        let name = required_string("name", self.name)?;
307        let language = required_string("language", self.language)?;
308        let recording = self
309            .recording
310            .ok_or_else(|| LingerError::invalid_config("recording is required"))?;
311        Ok(CreateVoiceConsentRequest {
312            name,
313            language,
314            recording,
315        })
316    }
317}
318
319/// EN: JSON request body for `POST /v1/audio/voice_consents/{consent_id}`.
320/// 中文：`POST /v1/audio/voice_consents/{consent_id}` 的 JSON 请求体。
321#[derive(Clone, Debug, Serialize, PartialEq, Eq)]
322#[non_exhaustive]
323pub struct UpdateVoiceConsentRequest {
324    /// EN: Updated label for this consent recording.
325    /// 中文：此 consent 录音更新后的标签。
326    pub name: String,
327}
328
329impl UpdateVoiceConsentRequest {
330    /// EN: Starts building a voice consent update request.
331    /// 中文：开始构建 voice consent 更新请求。
332    pub fn builder() -> UpdateVoiceConsentRequestBuilder {
333        UpdateVoiceConsentRequestBuilder::default()
334    }
335}
336
337/// EN: Builder for voice consent update requests.
338/// 中文：voice consent 更新请求的构建器。
339#[derive(Clone, Debug, Default)]
340#[non_exhaustive]
341pub struct UpdateVoiceConsentRequestBuilder {
342    name: Option<String>,
343}
344
345impl UpdateVoiceConsentRequestBuilder {
346    /// EN: Sets the updated consent recording label.
347    /// 中文：设置更新后的 consent 录音标签。
348    pub fn name(mut self, name: impl Into<String>) -> Self {
349        self.name = Some(name.into());
350        self
351    }
352
353    /// EN: Builds and validates the request.
354    /// 中文：构建并校验请求。
355    pub fn build(self) -> Result<UpdateVoiceConsentRequest, LingerError> {
356        let name = required_string("name", self.name)?;
357        Ok(UpdateVoiceConsentRequest { name })
358    }
359}
360
361/// EN: Query parameters for `GET /v1/audio/voice_consents`.
362/// 中文：`GET /v1/audio/voice_consents` 的查询参数。
363#[derive(Clone, Debug, Default, PartialEq, Eq)]
364#[non_exhaustive]
365pub struct AudioVoiceConsentListRequest {
366    /// EN: Maximum number of consent recordings to retrieve.
367    /// 中文：要获取的最大 consent 录音数量。
368    pub limit: Option<u8>,
369    /// EN: Cursor after which the next page starts.
370    /// 中文：下一页开始位置之前的游标。
371    pub after: Option<String>,
372}
373
374impl AudioVoiceConsentListRequest {
375    /// EN: Starts building voice consent list query parameters.
376    /// 中文：开始构建 voice consent 列表查询参数。
377    pub fn builder() -> AudioVoiceConsentListRequestBuilder {
378        AudioVoiceConsentListRequestBuilder::default()
379    }
380
381    pub(crate) fn path(&self) -> String {
382        path_with_query(
383            "/v1/audio/voice_consents",
384            AudioListQuery {
385                limit: self.limit,
386                after: self.after.as_deref(),
387            },
388        )
389    }
390}
391
392/// EN: Builder for voice consent list query parameters.
393/// 中文：voice consent 列表查询参数的构建器。
394#[derive(Clone, Debug, Default)]
395#[non_exhaustive]
396pub struct AudioVoiceConsentListRequestBuilder {
397    limit: Option<u8>,
398    after: Option<String>,
399}
400
401impl AudioVoiceConsentListRequestBuilder {
402    /// EN: Sets the maximum number of consent recordings to retrieve.
403    /// 中文：设置要获取的最大 consent 录音数量。
404    pub fn limit(mut self, limit: u8) -> Self {
405        self.limit = Some(limit);
406        self
407    }
408
409    /// EN: Sets the cursor after which the next page starts.
410    /// 中文：设置下一页开始位置之前的游标。
411    pub fn after(mut self, after: impl Into<String>) -> Self {
412        self.after = Some(after.into());
413        self
414    }
415
416    /// EN: Builds and validates the query parameters.
417    /// 中文：构建并校验查询参数。
418    pub fn build(self) -> Result<AudioVoiceConsentListRequest, LingerError> {
419        if let Some(limit) = self.limit {
420            if limit == 0 || limit > 100 {
421                return Err(LingerError::invalid_config(
422                    "limit must be between 1 and 100",
423                ));
424            }
425        }
426        if let Some(after) = &self.after {
427            if after.trim().is_empty() {
428                return Err(LingerError::invalid_config("after must not be empty"));
429            }
430        }
431        Ok(AudioVoiceConsentListRequest {
432            limit: self.limit,
433            after: self.after,
434        })
435    }
436}
437
438/// EN: Request body descriptor for `POST /v1/audio/voices`.
439/// 中文：`POST /v1/audio/voices` 的请求体描述。
440#[derive(Clone, Debug, PartialEq, Eq)]
441#[non_exhaustive]
442pub struct CreateVoiceRequest {
443    /// EN: Name for the new custom voice.
444    /// 中文：新自定义 voice 的名称。
445    pub name: String,
446    /// EN: Consent recording id authorizing this voice.
447    /// 中文：授权此 voice 的 consent 录音 ID。
448    pub consent: String,
449    /// EN: Sample audio recording for the custom voice.
450    /// 中文：用于自定义 voice 的示例录音。
451    pub audio_sample: AudioUpload,
452}
453
454impl CreateVoiceRequest {
455    /// EN: Starts building a custom voice request.
456    /// 中文：开始构建自定义 voice 请求。
457    pub fn builder() -> CreateVoiceRequestBuilder {
458        CreateVoiceRequestBuilder::default()
459    }
460
461    pub(crate) fn apply_multipart_body(&self, request: &mut HttpRequest) {
462        let fields = vec![
463            ("name".to_string(), self.name.clone()),
464            ("consent".to_string(), self.consent.clone()),
465        ];
466        apply_audio_multipart(request, "audio_sample", &self.audio_sample, fields);
467    }
468}
469
470/// EN: Builder for custom voice requests.
471/// 中文：自定义 voice 请求的构建器。
472#[derive(Clone, Debug, Default)]
473#[non_exhaustive]
474pub struct CreateVoiceRequestBuilder {
475    name: Option<String>,
476    consent: Option<String>,
477    audio_sample: Option<AudioUpload>,
478}
479
480impl CreateVoiceRequestBuilder {
481    /// EN: Sets the new custom voice name.
482    /// 中文：设置新自定义 voice 的名称。
483    pub fn name(mut self, name: impl Into<String>) -> Self {
484        self.name = Some(name.into());
485        self
486    }
487
488    /// EN: Sets the consent recording id.
489    /// 中文：设置 consent 录音 ID。
490    pub fn consent(mut self, consent: impl Into<String>) -> Self {
491        self.consent = Some(consent.into());
492        self
493    }
494
495    /// EN: Sets the sample audio recording.
496    /// 中文：设置示例录音。
497    pub fn audio_sample(mut self, audio_sample: AudioUpload) -> Self {
498        self.audio_sample = Some(audio_sample);
499        self
500    }
501
502    /// EN: Builds and validates the request.
503    /// 中文：构建并校验请求。
504    pub fn build(self) -> Result<CreateVoiceRequest, LingerError> {
505        let name = required_string("name", self.name)?;
506        let consent = required_string("consent", self.consent)?;
507        let audio_sample = self
508            .audio_sample
509            .ok_or_else(|| LingerError::invalid_config("audio_sample is required"))?;
510        Ok(CreateVoiceRequest {
511            name,
512            consent,
513            audio_sample,
514        })
515    }
516}
517
518/// EN: Request body descriptor for `POST /v1/audio/transcriptions`.
519/// 中文：`POST /v1/audio/transcriptions` 的请求体描述。
520#[derive(Clone, Debug, PartialEq)]
521#[non_exhaustive]
522pub struct CreateTranscriptionRequest {
523    /// EN: Audio file to transcribe.
524    /// 中文：要转写的音频文件。
525    pub file: AudioUpload,
526    /// EN: Transcription model id.
527    /// 中文：转写模型 ID。
528    pub model: String,
529    /// EN: Optional input language.
530    /// 中文：可选的输入语言。
531    pub language: Option<String>,
532    /// EN: Optional prompt.
533    /// 中文：可选提示。
534    pub prompt: Option<String>,
535    /// EN: Optional response format.
536    /// 中文：可选响应格式。
537    pub response_format: Option<String>,
538    /// EN: Optional sampling temperature.
539    /// 中文：可选采样温度。
540    pub temperature: Option<f32>,
541    /// EN: Optional timestamp granularities to populate.
542    /// 中文：要填充的可选时间戳粒度。
543    pub timestamp_granularities: Vec<String>,
544    /// EN: Optional chunking strategy.
545    /// 中文：可选分块策略。
546    pub chunking_strategy: Option<String>,
547    /// EN: Optional known speaker names for diarized transcription.
548    /// 中文：用于说话人分离转写的可选已知说话人名称。
549    pub known_speaker_names: Vec<String>,
550    /// EN: Optional known speaker audio reference data URLs.
551    /// 中文：可选的已知说话人音频引用 data URL。
552    pub known_speaker_references: Vec<String>,
553    include_logprobs: bool,
554}
555
556impl CreateTranscriptionRequest {
557    /// EN: Starts building a transcription request.
558    /// 中文：开始构建转写请求。
559    pub fn builder() -> CreateTranscriptionRequestBuilder {
560        CreateTranscriptionRequestBuilder::default()
561    }
562
563    pub(crate) fn apply_multipart_body(&self, request: &mut HttpRequest) {
564        let mut fields = Vec::new();
565        fields.push(("model".to_string(), self.model.clone()));
566        push_optional_field(&mut fields, "language", self.language.as_deref());
567        push_optional_field(&mut fields, "prompt", self.prompt.as_deref());
568        push_optional_field(
569            &mut fields,
570            "response_format",
571            self.response_format.as_deref(),
572        );
573        if let Some(temperature) = self.temperature {
574            fields.push(("temperature".to_string(), temperature.to_string()));
575        }
576        for granularity in &self.timestamp_granularities {
577            fields.push(("timestamp_granularities[]".to_string(), granularity.clone()));
578        }
579        push_optional_field(
580            &mut fields,
581            "chunking_strategy",
582            self.chunking_strategy.as_deref(),
583        );
584        for name in &self.known_speaker_names {
585            fields.push(("known_speaker_names[]".to_string(), name.clone()));
586        }
587        for reference in &self.known_speaker_references {
588            fields.push(("known_speaker_references[]".to_string(), reference.clone()));
589        }
590        if self.include_logprobs {
591            fields.push(("include[]".to_string(), "logprobs".to_string()));
592        }
593        apply_audio_multipart(request, "file", &self.file, fields);
594    }
595}
596
597/// EN: Builder for transcription requests.
598/// 中文：转写请求的构建器。
599#[derive(Clone, Debug, Default)]
600#[non_exhaustive]
601pub struct CreateTranscriptionRequestBuilder {
602    file: Option<AudioUpload>,
603    model: Option<String>,
604    language: Option<String>,
605    prompt: Option<String>,
606    response_format: Option<String>,
607    temperature: Option<f32>,
608    timestamp_granularities: Vec<String>,
609    chunking_strategy: Option<String>,
610    known_speaker_names: Vec<String>,
611    known_speaker_references: Vec<String>,
612    include_logprobs: bool,
613}
614
615impl CreateTranscriptionRequestBuilder {
616    /// EN: Sets the audio file.
617    /// 中文：设置音频文件。
618    pub fn file(mut self, file: AudioUpload) -> Self {
619        self.file = Some(file);
620        self
621    }
622
623    /// EN: Sets the transcription model id.
624    /// 中文：设置转写模型 ID。
625    pub fn model(mut self, model: impl Into<String>) -> Self {
626        self.model = Some(model.into());
627        self
628    }
629
630    /// EN: Sets the optional language.
631    /// 中文：设置可选语言。
632    pub fn language(mut self, language: impl Into<String>) -> Self {
633        self.language = Some(language.into());
634        self
635    }
636
637    /// EN: Sets the optional prompt.
638    /// 中文：设置可选提示。
639    pub fn prompt(mut self, prompt: impl Into<String>) -> Self {
640        self.prompt = Some(prompt.into());
641        self
642    }
643
644    /// EN: Sets the optional response format.
645    /// 中文：设置可选响应格式。
646    pub fn response_format(mut self, response_format: impl Into<String>) -> Self {
647        self.response_format = Some(response_format.into());
648        self
649    }
650
651    /// EN: Sets the optional temperature.
652    /// 中文：设置可选 temperature。
653    pub fn temperature(mut self, temperature: f32) -> Self {
654        self.temperature = Some(temperature);
655        self
656    }
657
658    /// EN: Adds a timestamp granularity such as `word` or `segment`.
659    /// 中文：添加一个时间戳粒度，例如 `word` 或 `segment`。
660    pub fn timestamp_granularity(mut self, granularity: impl Into<String>) -> Self {
661        self.timestamp_granularities.push(granularity.into());
662        self
663    }
664
665    /// EN: Replaces the timestamp granularity list.
666    /// 中文：替换时间戳粒度列表。
667    pub fn timestamp_granularities(
668        mut self,
669        granularities: impl IntoIterator<Item = impl Into<String>>,
670    ) -> Self {
671        self.timestamp_granularities = granularities.into_iter().map(Into::into).collect();
672        self
673    }
674
675    /// EN: Sets the transcription chunking strategy to documented `auto`.
676    /// 中文：将转写分块策略设置为官方文档中的 `auto`。
677    pub fn chunking_strategy_auto(mut self) -> Self {
678        self.chunking_strategy = Some("auto".to_string());
679        self
680    }
681
682    /// EN: Adds a known speaker name for diarized transcription.
683    /// 中文：为说话人分离转写添加一个已知说话人名称。
684    pub fn known_speaker_name(mut self, name: impl Into<String>) -> Self {
685        self.known_speaker_names.push(name.into());
686        self
687    }
688
689    /// EN: Replaces the known speaker name list.
690    /// 中文：替换已知说话人名称列表。
691    pub fn known_speaker_names(
692        mut self,
693        names: impl IntoIterator<Item = impl Into<String>>,
694    ) -> Self {
695        self.known_speaker_names = names.into_iter().map(Into::into).collect();
696        self
697    }
698
699    /// EN: Adds a known speaker reference audio data URL.
700    /// 中文：添加一个已知说话人引用音频 data URL。
701    pub fn known_speaker_reference(mut self, reference: impl Into<String>) -> Self {
702        self.known_speaker_references.push(reference.into());
703        self
704    }
705
706    /// EN: Replaces the known speaker reference list.
707    /// 中文：替换已知说话人引用列表。
708    pub fn known_speaker_references(
709        mut self,
710        references: impl IntoIterator<Item = impl Into<String>>,
711    ) -> Self {
712        self.known_speaker_references = references.into_iter().map(Into::into).collect();
713        self
714    }
715
716    /// EN: Includes token log probabilities in the transcription response.
717    /// 中文：在转写响应中包含 token 对数概率。
718    pub fn include_logprobs(mut self) -> Self {
719        self.include_logprobs = true;
720        self
721    }
722
723    /// EN: Builds and validates the request.
724    /// 中文：构建并校验请求。
725    pub fn build(self) -> Result<CreateTranscriptionRequest, LingerError> {
726        let file = self
727            .file
728            .ok_or_else(|| LingerError::invalid_config("file is required"))?;
729        let model = required_string("model", self.model)?;
730        validate_optional_string("language", self.language.as_deref())?;
731        validate_optional_string("prompt", self.prompt.as_deref())?;
732        validate_optional_string("response_format", self.response_format.as_deref())?;
733        validate_optional_string("chunking_strategy", self.chunking_strategy.as_deref())?;
734        validate_string_items("timestamp_granularities", &self.timestamp_granularities)?;
735        validate_limited_string_items("known_speaker_names", &self.known_speaker_names, 4)?;
736        validate_limited_string_items(
737            "known_speaker_references",
738            &self.known_speaker_references,
739            4,
740        )?;
741        Ok(CreateTranscriptionRequest {
742            file,
743            model,
744            language: self.language,
745            prompt: self.prompt,
746            response_format: self.response_format,
747            temperature: self.temperature,
748            timestamp_granularities: self.timestamp_granularities,
749            chunking_strategy: self.chunking_strategy,
750            known_speaker_names: self.known_speaker_names,
751            known_speaker_references: self.known_speaker_references,
752            include_logprobs: self.include_logprobs,
753        })
754    }
755}
756
757/// EN: Request body descriptor for `POST /v1/audio/translations`.
758/// 中文：`POST /v1/audio/translations` 的请求体描述。
759#[derive(Clone, Debug, PartialEq)]
760#[non_exhaustive]
761pub struct CreateTranslationRequest {
762    /// EN: Audio file to translate.
763    /// 中文：要翻译的音频文件。
764    pub file: AudioUpload,
765    /// EN: Translation model id.
766    /// 中文：翻译模型 ID。
767    pub model: String,
768    /// EN: Optional prompt.
769    /// 中文：可选提示。
770    pub prompt: Option<String>,
771    /// EN: Optional response format.
772    /// 中文：可选响应格式。
773    pub response_format: Option<String>,
774    /// EN: Optional sampling temperature.
775    /// 中文：可选采样温度。
776    pub temperature: Option<f32>,
777}
778
779impl CreateTranslationRequest {
780    /// EN: Starts building a translation request.
781    /// 中文：开始构建翻译请求。
782    pub fn builder() -> CreateTranslationRequestBuilder {
783        CreateTranslationRequestBuilder::default()
784    }
785
786    pub(crate) fn apply_multipart_body(&self, request: &mut HttpRequest) {
787        let mut fields = Vec::new();
788        fields.push(("model".to_string(), self.model.clone()));
789        push_optional_field(&mut fields, "prompt", self.prompt.as_deref());
790        push_optional_field(
791            &mut fields,
792            "response_format",
793            self.response_format.as_deref(),
794        );
795        if let Some(temperature) = self.temperature {
796            fields.push(("temperature".to_string(), temperature.to_string()));
797        }
798        apply_audio_multipart(request, "file", &self.file, fields);
799    }
800}
801
802/// EN: Builder for translation requests.
803/// 中文：翻译请求的构建器。
804#[derive(Clone, Debug, Default)]
805#[non_exhaustive]
806pub struct CreateTranslationRequestBuilder {
807    file: Option<AudioUpload>,
808    model: Option<String>,
809    prompt: Option<String>,
810    response_format: Option<String>,
811    temperature: Option<f32>,
812}
813
814impl CreateTranslationRequestBuilder {
815    /// EN: Sets the audio file.
816    /// 中文：设置音频文件。
817    pub fn file(mut self, file: AudioUpload) -> Self {
818        self.file = Some(file);
819        self
820    }
821
822    /// EN: Sets the translation model id.
823    /// 中文：设置翻译模型 ID。
824    pub fn model(mut self, model: impl Into<String>) -> Self {
825        self.model = Some(model.into());
826        self
827    }
828
829    /// EN: Sets the optional prompt.
830    /// 中文：设置可选提示。
831    pub fn prompt(mut self, prompt: impl Into<String>) -> Self {
832        self.prompt = Some(prompt.into());
833        self
834    }
835
836    /// EN: Sets the optional response format.
837    /// 中文：设置可选响应格式。
838    pub fn response_format(mut self, response_format: impl Into<String>) -> Self {
839        self.response_format = Some(response_format.into());
840        self
841    }
842
843    /// EN: Sets the optional temperature.
844    /// 中文：设置可选 temperature。
845    pub fn temperature(mut self, temperature: f32) -> Self {
846        self.temperature = Some(temperature);
847        self
848    }
849
850    /// EN: Builds and validates the request.
851    /// 中文：构建并校验请求。
852    pub fn build(self) -> Result<CreateTranslationRequest, LingerError> {
853        let file = self
854            .file
855            .ok_or_else(|| LingerError::invalid_config("file is required"))?;
856        let model = required_string("model", self.model)?;
857        validate_optional_string("prompt", self.prompt.as_deref())?;
858        validate_optional_string("response_format", self.response_format.as_deref())?;
859        Ok(CreateTranslationRequest {
860            file,
861            model,
862            prompt: self.prompt,
863            response_format: self.response_format,
864            temperature: self.temperature,
865        })
866    }
867}
868
869/// EN: JSON transcription response.
870/// 中文：JSON 转写响应。
871#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)]
872#[non_exhaustive]
873pub struct AudioTranscription {
874    /// EN: Transcribed text.
875    /// 中文：转写文本。
876    pub text: String,
877    /// EN: Additional fields preserved for forward compatibility.
878    /// 中文：为前向兼容保留的额外字段。
879    #[serde(flatten)]
880    pub extra: BTreeMap<String, Value>,
881    /// EN: OpenAI request id from response headers.
882    /// 中文：响应头中的 OpenAI 请求 ID。
883    #[serde(skip)]
884    request_id: Option<RequestId>,
885}
886
887impl AudioTranscription {
888    pub(crate) fn with_request_id(mut self, request_id: Option<RequestId>) -> Self {
889        self.request_id = request_id;
890        self
891    }
892
893    /// EN: Returns the OpenAI request id, when present.
894    /// 中文：返回 OpenAI 请求 ID，如存在。
895    pub fn request_id(&self) -> Option<&RequestId> {
896        self.request_id.as_ref()
897    }
898}
899
900/// EN: JSON translation response.
901/// 中文：JSON 翻译响应。
902#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)]
903#[non_exhaustive]
904pub struct AudioTranslation {
905    /// EN: Translated text.
906    /// 中文：翻译文本。
907    pub text: String,
908    /// EN: Additional fields preserved for forward compatibility.
909    /// 中文：为前向兼容保留的额外字段。
910    #[serde(flatten)]
911    pub extra: BTreeMap<String, Value>,
912    /// EN: OpenAI request id from response headers.
913    /// 中文：响应头中的 OpenAI 请求 ID。
914    #[serde(skip)]
915    request_id: Option<RequestId>,
916}
917
918impl AudioTranslation {
919    pub(crate) fn with_request_id(mut self, request_id: Option<RequestId>) -> Self {
920        self.request_id = request_id;
921        self
922    }
923
924    /// EN: Returns the OpenAI request id, when present.
925    /// 中文：返回 OpenAI 请求 ID，如存在。
926    pub fn request_id(&self) -> Option<&RequestId> {
927        self.request_id.as_ref()
928    }
929}
930
931/// EN: Voice consent recording used to authorize custom voice creation.
932/// 中文：用于授权创建自定义语音的 voice consent 录音。
933#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
934#[non_exhaustive]
935pub struct AudioVoiceConsent {
936    /// EN: API object type, normally `audio.voice_consent`.
937    /// 中文：API 对象类型，通常为 `audio.voice_consent`。
938    pub object: String,
939    /// EN: Consent recording id.
940    /// 中文：consent 录音 ID。
941    pub id: String,
942    /// EN: Label provided when the consent recording was uploaded.
943    /// 中文：上传 consent 录音时提供的标签。
944    pub name: String,
945    /// EN: BCP 47 language tag for the consent phrase.
946    /// 中文：consent 短语的 BCP 47 语言标签。
947    pub language: String,
948    /// EN: Unix timestamp for creation.
949    /// 中文：创建时间的 Unix 时间戳。
950    pub created_at: u64,
951    /// EN: OpenAI request id from response headers.
952    /// 中文：响应头中的 OpenAI 请求 ID。
953    #[serde(skip)]
954    request_id: Option<RequestId>,
955}
956
957impl AudioVoiceConsent {
958    pub(crate) fn with_request_id(mut self, request_id: Option<RequestId>) -> Self {
959        self.request_id = request_id;
960        self
961    }
962
963    /// EN: Returns the OpenAI request id, when present.
964    /// 中文：返回 OpenAI 请求 ID，如存在。
965    pub fn request_id(&self) -> Option<&RequestId> {
966        self.request_id.as_ref()
967    }
968}
969
970/// EN: Paginated voice consent list.
971/// 中文：分页 voice consent 列表。
972#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
973#[non_exhaustive]
974pub struct AudioVoiceConsentPage {
975    /// EN: API list object type.
976    /// 中文：API 列表对象类型。
977    pub object: String,
978    /// EN: Voice consent recordings on this page.
979    /// 中文：本页 voice consent 录音。
980    #[serde(default)]
981    pub data: Vec<AudioVoiceConsent>,
982    /// EN: First voice consent id on this page.
983    /// 中文：本页第一个 voice consent ID。
984    #[serde(default)]
985    pub first_id: Option<String>,
986    /// EN: Last voice consent id on this page.
987    /// 中文：本页最后一个 voice consent ID。
988    #[serde(default)]
989    pub last_id: Option<String>,
990    /// EN: Whether more voice consents are available.
991    /// 中文：是否还有更多 voice consent。
992    pub has_more: bool,
993    /// EN: OpenAI request id from response headers.
994    /// 中文：响应头中的 OpenAI 请求 ID。
995    #[serde(skip)]
996    request_id: Option<RequestId>,
997}
998
999impl AudioVoiceConsentPage {
1000    pub(crate) fn with_request_id(mut self, request_id: Option<RequestId>) -> Self {
1001        self.request_id = request_id;
1002        self
1003    }
1004
1005    /// EN: Returns the OpenAI request id, when present.
1006    /// 中文：返回 OpenAI 请求 ID，如存在。
1007    pub fn request_id(&self) -> Option<&RequestId> {
1008        self.request_id.as_ref()
1009    }
1010}
1011
1012/// EN: Deletion result returned for voice consent recordings.
1013/// 中文：voice consent 录音的删除结果。
1014#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
1015#[non_exhaustive]
1016pub struct AudioVoiceConsentDeletion {
1017    /// EN: Deleted voice consent id.
1018    /// 中文：已删除的 voice consent ID。
1019    pub id: String,
1020    /// EN: API object type, normally `audio.voice_consent`.
1021    /// 中文：API 对象类型，通常为 `audio.voice_consent`。
1022    pub object: String,
1023    /// EN: Whether the voice consent was deleted.
1024    /// 中文：voice consent 是否已删除。
1025    pub deleted: bool,
1026    /// EN: OpenAI request id from response headers.
1027    /// 中文：响应头中的 OpenAI 请求 ID。
1028    #[serde(skip)]
1029    request_id: Option<RequestId>,
1030}
1031
1032impl AudioVoiceConsentDeletion {
1033    pub(crate) fn with_request_id(mut self, request_id: Option<RequestId>) -> Self {
1034        self.request_id = request_id;
1035        self
1036    }
1037
1038    /// EN: Returns the OpenAI request id, when present.
1039    /// 中文：返回 OpenAI 请求 ID，如存在。
1040    pub fn request_id(&self) -> Option<&RequestId> {
1041        self.request_id.as_ref()
1042    }
1043}
1044
1045/// EN: Custom voice returned by the Audio Voices API.
1046/// 中文：Audio Voices API 返回的自定义 voice。
1047#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
1048#[non_exhaustive]
1049pub struct AudioVoice {
1050    /// EN: API object type, normally `audio.voice`.
1051    /// 中文：API 对象类型，通常为 `audio.voice`。
1052    pub object: String,
1053    /// EN: Voice identifier usable in audio output endpoints.
1054    /// 中文：可用于音频输出端点的 voice 标识符。
1055    pub id: String,
1056    /// EN: Voice name.
1057    /// 中文：voice 名称。
1058    pub name: String,
1059    /// EN: Unix timestamp for creation.
1060    /// 中文：创建时间的 Unix 时间戳。
1061    pub created_at: u64,
1062    /// EN: OpenAI request id from response headers.
1063    /// 中文：响应头中的 OpenAI 请求 ID。
1064    #[serde(skip)]
1065    request_id: Option<RequestId>,
1066}
1067
1068impl AudioVoice {
1069    pub(crate) fn with_request_id(mut self, request_id: Option<RequestId>) -> Self {
1070        self.request_id = request_id;
1071        self
1072    }
1073
1074    /// EN: Returns the OpenAI request id, when present.
1075    /// 中文：返回 OpenAI 请求 ID，如存在。
1076    pub fn request_id(&self) -> Option<&RequestId> {
1077        self.request_id.as_ref()
1078    }
1079}
1080
1081fn apply_audio_multipart(
1082    request: &mut HttpRequest,
1083    file_field_name: &str,
1084    file: &AudioUpload,
1085    fields: Vec<(String, String)>,
1086) {
1087    let boundary = multipart_boundary(&file.content);
1088    request.insert_header(
1089        "content-type",
1090        format!("multipart/form-data; boundary={boundary}"),
1091    );
1092    let mut chunks = Vec::new();
1093    for (name, value) in fields {
1094        chunks.push(Ok(Bytes::from(format!(
1095            "--{boundary}\r\nContent-Disposition: form-data; name=\"{name}\"\r\n\r\n{value}\r\n"
1096        ))));
1097    }
1098    chunks.push(Ok(Bytes::from(format!(
1099        "--{boundary}\r\nContent-Disposition: form-data; name=\"{}\"; filename=\"{}\"\r\nContent-Type: {}\r\n\r\n",
1100        escape_multipart_param(file_field_name),
1101        escape_multipart_param(&file.filename),
1102        file.content_type
1103    ))));
1104    chunks.push(Ok(file.content.clone()));
1105    chunks.push(Ok(Bytes::from(format!("\r\n--{boundary}--\r\n"))));
1106    request.set_body_stream(futures_util::stream::iter(chunks));
1107}
1108
1109fn push_optional_field(fields: &mut Vec<(String, String)>, name: &str, value: Option<&str>) {
1110    if let Some(value) = value {
1111        fields.push((name.to_string(), value.to_string()));
1112    }
1113}
1114
1115fn multipart_boundary(content: &Bytes) -> String {
1116    for counter in 0.. {
1117        let boundary = format!("linger-openai-sdk-audio-boundary-{counter}");
1118        if !contains_bytes(content, boundary.as_bytes()) {
1119            return boundary;
1120        }
1121    }
1122    unreachable!("unbounded boundary counter")
1123}
1124
1125fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
1126    if needle.is_empty() {
1127        return true;
1128    }
1129    haystack
1130        .windows(needle.len())
1131        .any(|window| window == needle)
1132}
1133
1134fn required_string(name: &str, value: Option<String>) -> Result<String, LingerError> {
1135    value
1136        .filter(|value| !value.trim().is_empty())
1137        .ok_or_else(|| LingerError::invalid_config(format!("{name} is required")))
1138}
1139
1140fn validate_optional_string(name: &str, value: Option<&str>) -> Result<(), LingerError> {
1141    if value.is_some_and(|value| value.trim().is_empty()) {
1142        return Err(LingerError::invalid_config(format!(
1143            "{name} must not be empty"
1144        )));
1145    }
1146    Ok(())
1147}
1148
1149fn validate_max_chars(name: &str, value: &str, max_chars: usize) -> Result<(), LingerError> {
1150    if value.chars().count() > max_chars {
1151        return Err(LingerError::invalid_config(format!(
1152            "{name} must be at most {max_chars} characters"
1153        )));
1154    }
1155    Ok(())
1156}
1157
1158fn validate_string_items(name: &str, values: &[String]) -> Result<(), LingerError> {
1159    if values.iter().any(|value| value.trim().is_empty()) {
1160        return Err(LingerError::invalid_config(format!(
1161            "{name} must not contain empty values"
1162        )));
1163    }
1164    Ok(())
1165}
1166
1167fn validate_limited_string_items(
1168    name: &str,
1169    values: &[String],
1170    max: usize,
1171) -> Result<(), LingerError> {
1172    validate_string_items(name, values)?;
1173    if values.len() > max {
1174        return Err(LingerError::invalid_config(format!(
1175            "{name} must contain at most {max} values"
1176        )));
1177    }
1178    Ok(())
1179}
1180
1181fn validate_header_param(name: &str, value: &str) -> Result<(), LingerError> {
1182    if value.trim().is_empty() {
1183        return Err(LingerError::invalid_config(format!("{name} is required")));
1184    }
1185    validate_header_value(name, value)
1186}
1187
1188fn validate_header_value(name: &str, value: &str) -> Result<(), LingerError> {
1189    if value.contains('\r') || value.contains('\n') {
1190        return Err(LingerError::invalid_config(format!(
1191            "{name} must not contain CR or LF"
1192        )));
1193    }
1194    Ok(())
1195}
1196
1197fn escape_multipart_param(value: &str) -> String {
1198    value.replace('\\', "\\\\").replace('"', "\\\"")
1199}
1200
1201struct AudioListQuery<'a> {
1202    limit: Option<u8>,
1203    after: Option<&'a str>,
1204}
1205
1206fn path_with_query(base: &str, params: AudioListQuery<'_>) -> String {
1207    let mut query = Vec::new();
1208    if let Some(limit) = params.limit {
1209        query.push(format!("limit={limit}"));
1210    }
1211    if let Some(after) = params.after {
1212        query.push(format!("after={}", encode_query_value(after)));
1213    }
1214    if query.is_empty() {
1215        base.to_string()
1216    } else {
1217        format!("{base}?{}", query.join("&"))
1218    }
1219}
1220
1221fn encode_query_value(value: &str) -> String {
1222    let mut encoded = String::new();
1223    for byte in value.bytes() {
1224        match byte {
1225            b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'.' | b'_' | b'~' => {
1226                encoded.push(byte as char);
1227            }
1228            _ => {
1229                const HEX: &[u8; 16] = b"0123456789ABCDEF";
1230                encoded.push('%');
1231                encoded.push(HEX[(byte >> 4) as usize] as char);
1232                encoded.push(HEX[(byte & 0x0F) as usize] as char);
1233            }
1234        }
1235    }
1236    encoded
1237}
linger_openai_sdk/audio.rs

linger_openai_sdk/
audio.rs