async_openai/types/audio/
stream.rs

1use std::pin::Pin;
2
3use futures::Stream;
4use serde::{Deserialize, Serialize};
5
6use crate::{
7    error::OpenAIError,
8    traits::EventType,
9    types::{audio::TranscriptTextUsageTokens, LogProbProperties},
10};
11
12#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
13#[serde(tag = "type", rename_all = "snake_case")]
14pub enum CreateSpeechResponseStreamEvent {
15    /// Emitted for each chunk of audio data generated during speech synthesis.
16    #[serde(rename = "speech.audio.delta")]
17    SpeechAudioDelta(SpeechAudioDeltaEvent),
18    /// Emitted when the speech synthesis is complete and all audio has been streamed.
19    #[serde(rename = "speech.audio.done")]
20    SpeechAudioDone(SpeechAudioDoneEvent),
21}
22
23#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
24pub struct SpeechAudioDeltaEvent {
25    /// A chunk of Base64-encoded audio data.
26    pub audio: String,
27}
28
29#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
30pub struct SpeechUsage {
31    /// Number of input tokens in the prompt.
32    pub input_tokens: u32,
33    /// Number of output tokens generated.
34    pub output_tokens: u32,
35    /// Total number of tokens used (input + output).
36    pub total_tokens: u32,
37}
38
39#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
40pub struct SpeechAudioDoneEvent {
41    /// Token usage statistics for the request.
42    pub usage: SpeechUsage,
43}
44
45/// Stream of response events
46pub type SpeechResponseStream =
47    Pin<Box<dyn Stream<Item = Result<CreateSpeechResponseStreamEvent, OpenAIError>> + Send>>;
48
49#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
50pub struct TranscriptionTextSegmentEvent {
51    /// Unique identifier for the segment.
52    pub id: String,
53    /// Start timestamp of the segment in seconds.
54    pub start: f32,
55    /// End timestamp of the segment in seconds.
56    pub end: f32,
57    /// Transcript text for this segment.
58    pub text: String,
59    /// Speaker label for this segment.
60    pub speaker: String,
61}
62
63#[derive(Debug, Serialize, Deserialize, Clone)]
64pub struct TranscriptionTextDeltaEvent {
65    /// The text delta that was additionally transcribed.
66    pub delta: String,
67    /// The log probabilities of the individual tokens in the transcription.
68    /// Only included if you [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) with
69    /// the `include[]` parameter set to `logprobs`.
70    pub logprobs: Option<Vec<LogProbProperties>>,
71    /// Identifier of the diarized segment that this delta belongs to. Only present when using
72    /// `gpt-4o-transcribe-diarize`.
73    pub segment_id: Option<String>,
74}
75
76#[derive(Debug, Serialize, Deserialize, Clone)]
77pub struct TranscriptionTextDoneEvent {
78    /// The text that was transcribed.
79    pub text: String,
80    /// The log probabilities of the individual tokens in the transcription.
81    /// Only included if you [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) with
82    /// the `include[]` parameter set to `logprobs`.
83    pub logprobs: Option<Vec<LogProbProperties>>,
84    /// Usage statistics for models billed by token usage.
85    pub usage: TranscriptTextUsageTokens,
86}
87
88#[derive(Debug, Serialize, Deserialize, Clone)]
89#[serde(tag = "type")]
90pub enum CreateTranscriptionResponseStreamEvent {
91    /// Emitted when a diarized transcription returns a completed segment with speaker information. Only
92    /// emitted when you [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) with
93    /// `stream` set to `true` and `response_format` set to `diarized_json`.
94    #[serde(rename = "transcript.text.segment")]
95    TranscriptTextSegment(TranscriptionTextSegmentEvent),
96    #[serde(rename = "transcript.text.delta")]
97    TranscriptTextDelta(TranscriptionTextDeltaEvent),
98    /// Emitted when the transcription is complete. Contains the complete transcription text. Only emitted
99    /// when you [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) with the
100    /// `Stream` parameter set to `true`.
101    #[serde(rename = "transcript.text.done")]
102    TranscriptTextDone(TranscriptionTextDoneEvent),
103}
104
105pub type TranscriptionResponseStream =
106    Pin<Box<dyn Stream<Item = Result<CreateTranscriptionResponseStreamEvent, OpenAIError>> + Send>>;
107
108impl EventType for SpeechAudioDeltaEvent {
109    fn event_type(&self) -> &'static str {
110        "speech.audio.delta"
111    }
112}
113
114impl EventType for SpeechAudioDoneEvent {
115    fn event_type(&self) -> &'static str {
116        "speech.audio.done"
117    }
118}
119
120impl EventType for CreateSpeechResponseStreamEvent {
121    fn event_type(&self) -> &'static str {
122        match self {
123            CreateSpeechResponseStreamEvent::SpeechAudioDelta(event) => event.event_type(),
124            CreateSpeechResponseStreamEvent::SpeechAudioDone(event) => event.event_type(),
125        }
126    }
127}
128
129impl EventType for TranscriptionTextSegmentEvent {
130    fn event_type(&self) -> &'static str {
131        "transcript.text.segment"
132    }
133}
134
135impl EventType for TranscriptionTextDeltaEvent {
136    fn event_type(&self) -> &'static str {
137        "transcript.text.delta"
138    }
139}
140
141impl EventType for TranscriptionTextDoneEvent {
142    fn event_type(&self) -> &'static str {
143        "transcript.text.done"
144    }
145}
146
147impl EventType for CreateTranscriptionResponseStreamEvent {
148    fn event_type(&self) -> &'static str {
149        match self {
150            CreateTranscriptionResponseStreamEvent::TranscriptTextSegment(event) => {
151                event.event_type()
152            }
153            CreateTranscriptionResponseStreamEvent::TranscriptTextDelta(event) => {
154                event.event_type()
155            }
156            CreateTranscriptionResponseStreamEvent::TranscriptTextDone(event) => event.event_type(),
157        }
158    }
159}