1#![allow(non_snake_case)]
3#![allow(non_camel_case_types)]
4#![allow(non_upper_case_globals)]
5#![allow(deref_nullptr)]
6
7#[repr(C)]
8#[derive(Debug, Copy, Clone, Default)]
9pub struct ModelState {
10 _unused: [u8; 0],
11}
12
13#[repr(C)]
14#[derive(Debug, Copy, Clone, Default)]
15pub struct StreamingState {
16 _unused: [u8; 0],
17}
18
19#[doc = " @brief Stores text of an individual token, along with its timing information"]
20#[repr(C)]
21#[derive(Debug, Copy, Clone)]
22pub struct TokenMetadata {
23 #[doc = " The text corresponding to this token"]
24 pub text: *const ::std::os::raw::c_char,
25 #[doc = " Position of the token in units of 20ms"]
26 pub timestep: ::std::os::raw::c_uint,
27 #[doc = " Position of the token in seconds"]
28 pub start_time: f32,
29}
30
31#[doc = " @brief A single transcript computed by the model, including a confidence"]
32#[doc = " value and the metadata for its constituent tokens."]
33#[repr(C)]
34#[derive(Debug, Copy, Clone)]
35pub struct CandidateTranscript {
36 #[doc = " Array of TokenMetadata objects"]
37 pub tokens: *const TokenMetadata,
38 #[doc = " Size of the tokens array"]
39 pub num_tokens: ::std::os::raw::c_uint,
40 #[doc = " Approximated confidence value for this transcript. This is roughly the"]
41 #[doc = " sum of the acoustic model logit values for each timestep/character that"]
42 #[doc = " contributed to the creation of this transcript."]
43 pub confidence: f64,
44}
45
46#[doc = " @brief An array of CandidateTranscript objects computed by the model."]
47#[repr(C)]
48#[derive(Debug, Copy, Clone)]
49pub struct Metadata {
50 #[doc = " Array of CandidateTranscript objects"]
51 pub transcripts: *const CandidateTranscript,
52 #[doc = " Size of the transcripts array"]
53 pub num_transcripts: ::std::os::raw::c_uint,
54}
55
56pub const STT_Error_Codes_STT_ERR_OK: STT_Error_Codes = 0;
57pub const STT_Error_Codes_STT_ERR_NO_MODEL: STT_Error_Codes = 4096;
58pub const STT_Error_Codes_STT_ERR_INVALID_ALPHABET: STT_Error_Codes = 8192;
59pub const STT_Error_Codes_STT_ERR_INVALID_SHAPE: STT_Error_Codes = 8193;
60pub const STT_Error_Codes_STT_ERR_INVALID_SCORER: STT_Error_Codes = 8194;
61pub const STT_Error_Codes_STT_ERR_MODEL_INCOMPATIBLE: STT_Error_Codes = 8195;
62pub const STT_Error_Codes_STT_ERR_SCORER_NOT_ENABLED: STT_Error_Codes = 8196;
63pub const STT_Error_Codes_STT_ERR_SCORER_UNREADABLE: STT_Error_Codes = 8197;
64pub const STT_Error_Codes_STT_ERR_SCORER_INVALID_LM: STT_Error_Codes = 8198;
65pub const STT_Error_Codes_STT_ERR_SCORER_NO_TRIE: STT_Error_Codes = 8199;
66pub const STT_Error_Codes_STT_ERR_SCORER_INVALID_TRIE: STT_Error_Codes = 8200;
67pub const STT_Error_Codes_STT_ERR_SCORER_VERSION_MISMATCH: STT_Error_Codes = 8201;
68pub const STT_Error_Codes_STT_ERR_FAIL_INIT_MMAP: STT_Error_Codes = 12288;
69pub const STT_Error_Codes_STT_ERR_FAIL_INIT_SESS: STT_Error_Codes = 12289;
70pub const STT_Error_Codes_STT_ERR_FAIL_INTERPRETER: STT_Error_Codes = 12290;
71pub const STT_Error_Codes_STT_ERR_FAIL_RUN_SESS: STT_Error_Codes = 12291;
72pub const STT_Error_Codes_STT_ERR_FAIL_CREATE_STREAM: STT_Error_Codes = 12292;
73pub const STT_Error_Codes_STT_ERR_FAIL_READ_PROTOBUF: STT_Error_Codes = 12293;
74pub const STT_Error_Codes_STT_ERR_FAIL_CREATE_SESS: STT_Error_Codes = 12294;
75pub const STT_Error_Codes_STT_ERR_FAIL_CREATE_MODEL: STT_Error_Codes = 12295;
76pub const STT_Error_Codes_STT_ERR_FAIL_INSERT_HOTWORD: STT_Error_Codes = 12296;
77pub const STT_Error_Codes_STT_ERR_FAIL_CLEAR_HOTWORD: STT_Error_Codes = 12297;
78pub const STT_Error_Codes_STT_ERR_FAIL_ERASE_HOTWORD: STT_Error_Codes = 12304;
79pub type STT_Error_Codes = ::std::os::raw::c_uint;
80
81#[cfg_attr(not(target_os = "windows"), link(name = "stt"))]
82#[cfg_attr(target_os = "windows", link(name = "libstt.so.if"))]
83extern "C" {
84 #[doc = " @brief An object providing an interface to a trained Coqui STT model."]
85 #[doc = ""]
86 #[doc = " @param aModelPath The path to the frozen model graph."]
87 #[doc = " @param[out] retval a ModelState pointer"]
88 #[doc = ""]
89 #[doc = " @return Zero on success, non-zero on failure."]
90 pub fn STT_CreateModel(
91 aModelPath: *const ::std::os::raw::c_char,
92 retval: *mut *mut ModelState,
93 ) -> ::std::os::raw::c_int;
94
95 #[doc = " @brief An object providing an interface to a trained Coqui STT model, loaded from a buffer."]
96 #[doc = ""]
97 #[doc = " @param aModelBuffer The buffer containing the content of the exported model."]
98 #[doc = " @param aBufferSize Size of model buffer."]
99 #[doc = " @param[out] retval a ModelState pointer"]
100 #[doc = ""]
101 #[doc = " @return Zero on success, non-zero on failure."]
102 #[cfg(not(target_os = "windows"))]
103 pub fn STT_CreateModelFromBuffer(
104 aModelBuffer: *const ::std::os::raw::c_char,
105 aBufferSize: ::std::os::raw::c_uint,
106 retval: *mut *mut ModelState,
107 ) -> ::std::os::raw::c_int;
108
109 #[doc = " @brief Get beam width value used by the model. If {@link STT_SetModelBeamWidth}"]
110 #[doc = " was not called before, will return the default value loaded from the"]
111 #[doc = " model file."]
112 #[doc = ""]
113 #[doc = " @param aCtx A ModelState pointer created with {@link STT_CreateModel}."]
114 #[doc = ""]
115 #[doc = " @return Beam width value used by the model."]
116 pub fn STT_GetModelBeamWidth(aCtx: *const ModelState) -> ::std::os::raw::c_uint;
117
118 #[doc = " @brief Set beam width value used by the model."]
119 #[doc = ""]
120 #[doc = " @param aCtx A ModelState pointer created with {@link STT_CreateModel}."]
121 #[doc = " @param aBeamWidth The beam width used by the model. A larger beam width value"]
122 #[doc = " generates better results at the cost of decoding time."]
123 #[doc = ""]
124 #[doc = " @return Zero on success, non-zero on failure."]
125 pub fn STT_SetModelBeamWidth(
126 aCtx: *mut ModelState,
127 aBeamWidth: ::std::os::raw::c_uint,
128 ) -> ::std::os::raw::c_int;
129
130 #[doc = " @brief Return the sample rate expected by a model."]
131 #[doc = ""]
132 #[doc = " @param aCtx A ModelState pointer created with {@link STT_CreateModel}."]
133 #[doc = ""]
134 #[doc = " @return Sample rate expected by the model for its input."]
135 pub fn STT_GetModelSampleRate(aCtx: *const ModelState) -> ::std::os::raw::c_int;
136
137 #[doc = " @brief Frees associated resources and destroys model object."]
138 pub fn STT_FreeModel(ctx: *mut ModelState);
139
140 #[doc = " @brief Enable decoding using an external scorer."]
141 #[doc = ""]
142 #[doc = " @param aCtx The ModelState pointer for the model being changed."]
143 #[doc = " @param aScorerPath The path to the external scorer file."]
144 #[doc = ""]
145 #[doc = " @return Zero on success, non-zero on failure (invalid arguments)."]
146 pub fn STT_EnableExternalScorer(
147 aCtx: *mut ModelState,
148 aScorerPath: *const ::std::os::raw::c_char,
149 ) -> ::std::os::raw::c_int;
150
151 #[doc = " @brief Enable decoding using an external scorer loaded from a buffer."]
152 #[doc = ""]
153 #[doc = " @param aCtx The ModelState pointer for the model being changed."]
154 #[doc = " @param aScorerBuffer The buffer containing the content of an external-scorer file."]
155 #[doc = " @param aBufferSize Size of scorer buffer."]
156 #[doc = ""]
157 #[doc = " @return Zero on success, non-zero on failure (invalid arguments)."]
158 #[cfg(not(target_os = "windows"))]
159 pub fn STT_EnableExternalScorerFromBuffer(
160 aCtx: *mut ModelState,
161 aScorerBuffer: *const ::std::os::raw::c_char,
162 aBufferSize: ::std::os::raw::c_uint,
163 ) -> ::std::os::raw::c_int;
164
165 #[doc = " @brief Add a hot-word and its boost."]
166 #[doc = ""]
167 #[doc = " Words that don't occur in the scorer (e.g. proper nouns) or strings that contain spaces won't be taken into account."]
168 #[doc = ""]
169 #[doc = " @param aCtx The ModelState pointer for the model being changed."]
170 #[doc = " @param word The hot-word."]
171 #[doc = " @param boost The boost. Positive value increases and negative reduces chance of a word occuring in a transcription. Excessive positive boost might lead to splitting up of letters of the word following the hot-word."]
172 #[doc = ""]
173 #[doc = " @return Zero on success, non-zero on failure (invalid arguments)."]
174 pub fn STT_AddHotWord(
175 aCtx: *mut ModelState,
176 word: *const ::std::os::raw::c_char,
177 boost: f32,
178 ) -> ::std::os::raw::c_int;
179
180 #[doc = " @brief Remove entry for a hot-word from the hot-words map."]
181 #[doc = ""]
182 #[doc = " @param aCtx The ModelState pointer for the model being changed."]
183 #[doc = " @param word The hot-word."]
184 #[doc = ""]
185 #[doc = " @return Zero on success, non-zero on failure (invalid arguments)."]
186 pub fn STT_EraseHotWord(
187 aCtx: *mut ModelState,
188 word: *const ::std::os::raw::c_char,
189 ) -> ::std::os::raw::c_int;
190
191 #[doc = " @brief Removes all elements from the hot-words map."]
192 #[doc = ""]
193 #[doc = " @param aCtx The ModelState pointer for the model being changed."]
194 #[doc = ""]
195 #[doc = " @return Zero on success, non-zero on failure (invalid arguments)."]
196 pub fn STT_ClearHotWords(aCtx: *mut ModelState) -> ::std::os::raw::c_int;
197
198 #[doc = " @brief Disable decoding using an external scorer."]
199 #[doc = ""]
200 #[doc = " @param aCtx The ModelState pointer for the model being changed."]
201 #[doc = ""]
202 #[doc = " @return Zero on success, non-zero on failure."]
203 pub fn STT_DisableExternalScorer(aCtx: *mut ModelState) -> ::std::os::raw::c_int;
204
205 #[doc = " @brief Set hyperparameters alpha and beta of the external scorer."]
206 #[doc = ""]
207 #[doc = " @param aCtx The ModelState pointer for the model being changed."]
208 #[doc = " @param aAlpha The alpha hyperparameter of the decoder. Language model weight."]
209 #[doc = " @param aLMBeta The beta hyperparameter of the decoder. Word insertion weight."]
210 #[doc = ""]
211 #[doc = " @return Zero on success, non-zero on failure."]
212 pub fn STT_SetScorerAlphaBeta(
213 aCtx: *mut ModelState,
214 aAlpha: f32,
215 aBeta: f32,
216 ) -> ::std::os::raw::c_int;
217
218 #[doc = " @brief Use the Coqui STT model to convert speech to text."]
219 #[doc = ""]
220 #[doc = " @param aCtx The ModelState pointer for the model to use."]
221 #[doc = " @param aBuffer A 16-bit, mono raw audio signal at the appropriate"]
222 #[doc = " sample rate (matching what the model was trained on)."]
223 #[doc = " @param aBufferSize The number of samples in the audio signal."]
224 #[doc = ""]
225 #[doc = " @return The STT result. The user is responsible for freeing the string using"]
226 #[doc = " {@link STT_FreeString()}. Returns NULL on error."]
227 pub fn STT_SpeechToText(
228 aCtx: *mut ModelState,
229 aBuffer: *const ::std::os::raw::c_short,
230 aBufferSize: ::std::os::raw::c_uint,
231 ) -> *mut ::std::os::raw::c_char;
232
233 #[doc = " @brief Use the Coqui STT model to convert speech to text and output results"]
234 #[doc = " including metadata."]
235 #[doc = ""]
236 #[doc = " @param aCtx The ModelState pointer for the model to use."]
237 #[doc = " @param aBuffer A 16-bit, mono raw audio signal at the appropriate"]
238 #[doc = " sample rate (matching what the model was trained on)."]
239 #[doc = " @param aBufferSize The number of samples in the audio signal."]
240 #[doc = " @param aNumResults The maximum number of CandidateTranscript structs to return. Returned value might be smaller than this."]
241 #[doc = ""]
242 #[doc = " @return Metadata struct containing multiple CandidateTranscript structs. Each"]
243 #[doc = " transcript has per-token metadata including timing information. The"]
244 #[doc = " user is responsible for freeing Metadata by calling {@link STT_FreeMetadata()}."]
245 #[doc = " Returns NULL on error."]
246 pub fn STT_SpeechToTextWithMetadata(
247 aCtx: *mut ModelState,
248 aBuffer: *const ::std::os::raw::c_short,
249 aBufferSize: ::std::os::raw::c_uint,
250 aNumResults: ::std::os::raw::c_uint,
251 ) -> *mut Metadata;
252
253 #[doc = " @brief Create a new streaming inference state. The streaming state returned"]
254 #[doc = " by this function can then be passed to {@link STT_FeedAudioContent()}"]
255 #[doc = " and {@link STT_FinishStream()}."]
256 #[doc = ""]
257 #[doc = " @param aCtx The ModelState pointer for the model to use."]
258 #[doc = " @param[out] retval an opaque pointer that represents the streaming state. Can"]
259 #[doc = " be NULL if an error occurs."]
260 #[doc = ""]
261 #[doc = " @return Zero for success, non-zero on failure."]
262 pub fn STT_CreateStream(
263 aCtx: *mut ModelState,
264 retval: *mut *mut StreamingState,
265 ) -> ::std::os::raw::c_int;
266
267 #[doc = " @brief Feed audio samples to an ongoing streaming inference."]
268 #[doc = ""]
269 #[doc = " @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}."]
270 #[doc = " @param aBuffer An array of 16-bit, mono raw audio samples at the"]
271 #[doc = " appropriate sample rate (matching what the model was trained on)."]
272 #[doc = " @param aBufferSize The number of samples in @p aBuffer."]
273 pub fn STT_FeedAudioContent(
274 aSctx: *mut StreamingState,
275 aBuffer: *const ::std::os::raw::c_short,
276 aBufferSize: ::std::os::raw::c_uint,
277 );
278
279 #[doc = " @brief Compute the intermediate decoding of an ongoing streaming inference."]
280 #[doc = ""]
281 #[doc = " @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}."]
282 #[doc = ""]
283 #[doc = " @return The STT intermediate result. The user is responsible for freeing the"]
284 #[doc = " string using {@link STT_FreeString()}."]
285 pub fn STT_IntermediateDecode(aSctx: *const StreamingState) -> *mut ::std::os::raw::c_char;
286
287 #[doc = " @brief Compute the intermediate decoding of an ongoing streaming inference,"]
288 #[doc = " return results including metadata."]
289 #[doc = ""]
290 #[doc = " @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}."]
291 #[doc = " @param aNumResults The number of candidate transcripts to return."]
292 #[doc = ""]
293 #[doc = " @return Metadata struct containing multiple candidate transcripts. Each transcript"]
294 #[doc = " has per-token metadata including timing information. The user is"]
295 #[doc = " responsible for freeing Metadata by calling {@link STT_FreeMetadata()}."]
296 #[doc = " Returns NULL on error."]
297 pub fn STT_IntermediateDecodeWithMetadata(
298 aSctx: *const StreamingState,
299 aNumResults: ::std::os::raw::c_uint,
300 ) -> *mut Metadata;
301
302 #[doc = " @brief EXPERIMENTAL: Compute the intermediate decoding of an ongoing streaming"]
303 #[doc = " inference, flushing buffers first. This ensures that all audio that"]
304 #[doc = " has been streamed so far is included in the result, but is more expensive"]
305 #[doc = " than STT_IntermediateDecode() because buffers are processed through"]
306 #[doc = " the acoustic model. Calling this function too often will also degrade"]
307 #[doc = " transcription accuracy due to trashing of the LSTM hidden state vectors."]
308 #[doc = ""]
309 #[doc = " @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}."]
310 #[doc = ""]
311 #[doc = " @return The STT result. The user is responsible for freeing the string using"]
312 #[doc = " {@link STT_FreeString()}."]
313 #[doc = ""]
314 #[doc = " @note This method will free the state pointer (@p aSctx)."]
315 pub fn STT_IntermediateDecodeFlushBuffers(
316 aSctx: *mut StreamingState,
317 ) -> *mut ::std::os::raw::c_char;
318
319 #[doc = " @brief EXPERIMENTAL: Compute the intermediate decoding of an ongoing streaming"]
320 #[doc = " inference, flushing buffers first. This ensures that all audio that"]
321 #[doc = " has been streamed so far is included in the result, but is more expensive"]
322 #[doc = " than STT_IntermediateDecodeWithMetadata() because buffers are processed"]
323 #[doc = " through the acoustic model. Calling this function too often will also"]
324 #[doc = " degrade transcription accuracy due to trashing of the LSTM hidden state"]
325 #[doc = " vectors. Returns results including metadata."]
326 #[doc = ""]
327 #[doc = " @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}."]
328 #[doc = " @param aNumResults The number of candidate transcripts to return."]
329 #[doc = ""]
330 #[doc = " @return Metadata struct containing multiple candidate transcripts. Each transcript"]
331 #[doc = " has per-token metadata including timing information. The user is"]
332 #[doc = " responsible for freeing Metadata by calling {@link STT_FreeMetadata()}."]
333 #[doc = " Returns NULL on error."]
334 pub fn STT_IntermediateDecodeWithMetadataFlushBuffers(
335 aSctx: *mut StreamingState,
336 aNumResults: ::std::os::raw::c_uint,
337 ) -> *mut Metadata;
338
339 #[doc = " @brief Compute the final decoding of an ongoing streaming inference and return"]
340 #[doc = " the result. Signals the end of an ongoing streaming inference."]
341 #[doc = ""]
342 #[doc = " @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}."]
343 #[doc = ""]
344 #[doc = " @return The STT result. The user is responsible for freeing the string using"]
345 #[doc = " {@link STT_FreeString()}."]
346 #[doc = ""]
347 #[doc = " @note This method will free the state pointer (@p aSctx)."]
348 pub fn STT_FinishStream(aSctx: *mut StreamingState) -> *mut ::std::os::raw::c_char;
349
350 #[doc = " @brief Compute the final decoding of an ongoing streaming inference and return"]
351 #[doc = " results including metadata. Signals the end of an ongoing streaming"]
352 #[doc = " inference."]
353 #[doc = ""]
354 #[doc = " @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}."]
355 #[doc = " @param aNumResults The number of candidate transcripts to return."]
356 #[doc = ""]
357 #[doc = " @return Metadata struct containing multiple candidate transcripts. Each transcript"]
358 #[doc = " has per-token metadata including timing information. The user is"]
359 #[doc = " responsible for freeing Metadata by calling {@link STT_FreeMetadata()}."]
360 #[doc = " Returns NULL on error."]
361 #[doc = ""]
362 #[doc = " @note This method will free the state pointer (@p aSctx)."]
363 pub fn STT_FinishStreamWithMetadata(
364 aSctx: *mut StreamingState,
365 aNumResults: ::std::os::raw::c_uint,
366 ) -> *mut Metadata;
367
368 #[doc = " @brief Destroy a streaming state without decoding the computed logits. This"]
369 #[doc = " can be used if you no longer need the result of an ongoing streaming"]
370 #[doc = " inference and don't want to perform a costly decode operation."]
371 #[doc = ""]
372 #[doc = " @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}."]
373 #[doc = ""]
374 #[doc = " @note This method will free the state pointer (@p aSctx)."]
375 pub fn STT_FreeStream(aSctx: *mut StreamingState);
376
377 #[doc = " @brief Free memory allocated for metadata information."]
378 pub fn STT_FreeMetadata(m: *mut Metadata);
379
380 #[doc = " @brief Free a char* string returned by the Coqui STT API."]
381 pub fn STT_FreeString(str_: *mut ::std::os::raw::c_char);
382
383 #[doc = " @brief Returns the version of this library. The returned version is a semantic"]
384 #[doc = " version (SemVer 2.0.0). The string returned must be freed with {@link STT_FreeString()}."]
385 #[doc = ""]
386 #[doc = " @return The version string."]
387 pub fn STT_Version() -> *mut ::std::os::raw::c_char;
388
389 #[doc = " @brief Returns a textual description corresponding to an error code."]
390 #[doc = " The string returned must be freed with @{link STT_FreeString()}."]
391 #[doc = ""]
392 #[doc = " @return The error description."]
393 pub fn STT_ErrorCodeToErrorMessage(
394 aErrorCode: ::std::os::raw::c_int,
395 ) -> *mut ::std::os::raw::c_char;
396}