1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
/// Next ID to use: 12
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SerializedSodaConfigMsg {
    /// Number of channels in RAW audio that will be provided to SODA.
    #[prost(int32, optional, tag="1")]
    pub channel_count: ::core::option::Option<i32>,
    /// Sample rate, in Hz.
    #[prost(int32, optional, tag="2")]
    pub sample_rate: ::core::option::Option<i32>,
    /// Maximum size of buffer to use in PipeStream. By default, is 0, which means
    /// unlimited.
    #[prost(int32, optional, tag="4", default="0")]
    pub max_buffer_bytes: ::core::option::Option<i32>,
    /// If set to true, forces the audio provider to simulate realtime audio
    /// provision. This only makes sense during testing, to simulate realtime audio
    /// providing from a big chunk of audio.
    /// This slows down audio provided to SODA to a maximum of real-time, which
    /// means more accurate endpointer behavior, but is unsuitable for execution in
    /// real production environments. Set with caution!
    #[prost(bool, optional, tag="5", default="false")]
    pub simulate_realtime_testonly: ::core::option::Option<bool>,
    /// config file location for languagepack.
    #[deprecated]
    #[prost(string, optional, tag="3")]
    pub config_file_location: ::core::option::Option<::prost::alloc::string::String>,
    /// API key used for call verification.
    #[prost(string, optional, tag="6")]
    pub api_key: ::core::option::Option<::prost::alloc::string::String>,
    /// Directory of the language pack to use.
    #[prost(string, optional, tag="7")]
    pub language_pack_directory: ::core::option::Option<::prost::alloc::string::String>,
    /// What kind of recognition to execute here. Impacts model usage.
    #[prost(enumeration="serialized_soda_config_msg::RecognitionMode", optional, tag="8", default="Ime")]
    pub recognition_mode: ::core::option::Option<i32>,
    /// Whether terse_processor should force a new session after every final
    /// recognition result.
    /// This will cause the terse processor to stop processing new audio once an
    /// endpoint event is detected and wait for it to generate a final event using
    /// audio up to the endpoint. This will cause processing bursts when a new
    /// session starts.
    #[prost(bool, optional, tag="9", default="true")]
    pub reset_on_final_result: ::core::option::Option<bool>,
    /// Whether to populate the timing_metrics field on Recognition and Endpoint
    /// events.
    #[prost(bool, optional, tag="10", default="true")]
    pub include_timing_metrics: ::core::option::Option<bool>,
    /// Whether or not to request lang id events.
    #[prost(bool, optional, tag="11", default="false")]
    pub enable_lang_id: ::core::option::Option<bool>,
}
/// Nested message and enum types in `SerializedSodaConfigMsg`.
pub mod serialized_soda_config_msg {
    #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
    #[repr(i32)]
    pub enum RecognitionMode {
        Unknown = 0,
        /// Intended for voice input for keyboard usage.
        Ime = 1,
        /// Intended to caption a stream of audio.
        Caption = 2,
    }
}
/// Next id: 5
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TimingMetrics {
    /// Epoch time of first audio buffer of main query that is fed into ASR.
    /// This is the wall time read from the system clock when the first audio
    /// buffer is received by the terse processor.
    #[prost(int64, optional, tag="1")]
    pub audio_start_epoch_usec: ::core::option::Option<i64>,
    /// Start time in audio time from start of SODA session.
    /// This time measures the amount of audio input into SODA.
    #[prost(int64, optional, tag="2")]
    pub audio_start_time_usec: ::core::option::Option<i64>,
    /// Elapsed wall time usec since first frame.
    #[prost(int64, optional, tag="3")]
    pub elapsed_wall_time_usec: ::core::option::Option<i64>,
    /// Elapsed processed audio usec from first frame after preamble.
    #[prost(int64, optional, tag="4")]
    pub event_end_time_usec: ::core::option::Option<i64>,
}
/// Next id: 5
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SodaRecognitionResult {
    /// Hypothesis from recognition, in order of probability. We don't get the
    /// probability from SODA, so the only given is that the first is the "best".
    #[prost(string, repeated, tag="1")]
    pub hypothesis: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
    /// What kind of result set this is.
    #[prost(enumeration="soda_recognition_result::ResultType", optional, tag="2")]
    pub result_type: ::core::option::Option<i32>,
    /// If this is a final result, why was the recognition marked final.
    #[prost(enumeration="soda_recognition_result::FinalResultEndpointReason", optional, tag="3")]
    pub endpoint_reason: ::core::option::Option<i32>,
    /// Timing information for the event.
    #[prost(message, optional, tag="4")]
    pub timing_metrics: ::core::option::Option<TimingMetrics>,
}
/// Nested message and enum types in `SodaRecognitionResult`.
pub mod soda_recognition_result {
    #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
    #[repr(i32)]
    pub enum ResultType {
        Unknown = 0,
        /// Partial result of a speech segment so far.
        Partial = 1,
        /// Final result for this segment.
        Final = 2,
        /// Prefetch is only sent for likely query strings. This won't happen for
        /// non-query mode SODA, but we add here for completeness.
        Prefetch = 3,
    }
    #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
    #[repr(i32)]
    pub enum FinalResultEndpointReason {
        EndpointUnknown = 0,
        /// End of speech from endpointer.
        EndpointEndOfSpeech = 1,
        /// End of utterance from endpointer.
        EndpointEndOfUtterance = 2,
        /// No more audio.
        EndpointEndOfAudio = 3,
        /// Final was generated because a hotword was detected.
        EndpointAsrResetByHotword = 4,
        /// ASR was reset via the external API.
        EndpointAsrResetExternal = 5,
        /// Final recognition result was generated due to an error in ASR.
        EndpointAsrError = 6,
    }
}
/// Next id: 3
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SodaEndpointEvent {
    #[prost(enumeration="soda_endpoint_event::EndpointType", optional, tag="1", default="Unknown")]
    pub endpoint_type: ::core::option::Option<i32>,
    /// Timing information for the event.
    #[prost(message, optional, tag="2")]
    pub timing_metrics: ::core::option::Option<TimingMetrics>,
}
/// Nested message and enum types in `SodaEndpointEvent`.
pub mod soda_endpoint_event {
    /// What endpoint type we're referring to here.
    #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
    #[repr(i32)]
    pub enum EndpointType {
        /// A start-of-speech moment has been detected at this time. Audio currently
        /// contains speech.
        StartOfSpeech = 0,
        /// End of speech has been detected by the endpointer, audio does not contain
        /// speech right now.
        EndOfSpeech = 1,
        /// End of Audio due to an end-of-mic data event.
        EndOfAudio = 2,
        /// End of Utterance detected from the endpointer. Not used in
        /// Caption/Transcription.
        EndOfUtterance = 3,
        Unknown = 4,
    }
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SodaAudioLevelInfo {
    /// Low-pass filtered RMS in range 0..1.
    #[prost(float, optional, tag="1")]
    pub rms: ::core::option::Option<f32>,
    /// Speech likelihood score from in range 0..1.
    #[prost(float, optional, tag="2")]
    pub audio_level: ::core::option::Option<f32>,
    /// Amount of audio seen from start of SODA session until an audio level event.
    /// This value is only set when audio_level is set.
    #[prost(int64, optional, tag="3")]
    pub audio_time_usec: ::core::option::Option<i64>,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SodaLangIdEvent {
    /// Locale, e.g. "en-us" or "af-za"
    #[prost(string, optional, tag="1")]
    pub language: ::core::option::Option<::prost::alloc::string::String>,
    /// Equal to the internal enum from langid confidence.
    #[prost(int32, optional, tag="2")]
    pub confidence_level: ::core::option::Option<i32>,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SodaResponse {
    #[prost(enumeration="soda_response::SodaMessageType", optional, tag="1", default="Unknown")]
    pub soda_type: ::core::option::Option<i32>,
    /// Set when type is RECOGNITION
    #[prost(message, optional, tag="2")]
    pub recognition_result: ::core::option::Option<SodaRecognitionResult>,
    /// Set when type is ENDPOINT
    #[prost(message, optional, tag="3")]
    pub endpoint_event: ::core::option::Option<SodaEndpointEvent>,
    /// Set when type is AUDIO_LEVEL
    #[prost(message, optional, tag="4")]
    pub audio_level_info: ::core::option::Option<SodaAudioLevelInfo>,
    /// Set when type is LANGID
    #[prost(message, optional, tag="5")]
    pub langid_event: ::core::option::Option<SodaLangIdEvent>,
}
/// Nested message and enum types in `SodaResponse`.
pub mod soda_response {
    #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
    #[repr(i32)]
    pub enum SodaMessageType {
        Unknown = 0,
        Recognition = 1,
        Stop = 2,
        Shutdown = 3,
        Start = 4,
        Endpoint = 5,
        AudioLevel = 6,
        Langid = 7,
    }
}