1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
/* automatically generated by rust-bindgen */

pub type max_align_t = f64;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct ModelState {
    _unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct StreamingState {
    _unused: [u8; 0],
}
extern "C" {
    /// @brief An object providing an interface to a trained DeepSpeech model.
    ///
    /// @param aModelPath The path to the frozen model graph.
    /// @param aNCep The number of cepstrum the model was trained with.
    /// @param aNContext The context window the model was trained with.
    /// @param aAlphabetConfigPath The path to the configuration file specifying
    /// the alphabet used by the network. See alphabet.h.
    /// @param aBeamWidth The beam width used by the decoder. A larger beam
    /// width generates better results at the cost of decoding
    /// time.
    /// @param[out] retval a ModelState pointer
    ///
    /// @return Zero on success, non-zero on failure.
    #[link_name = "\u{1}__Z14DS_CreateModelPKcjjS0_jPP10ModelState"]
    pub fn DS_CreateModel(
        aModelPath: *const ::std::os::raw::c_char,
        aNCep: ::std::os::raw::c_uint,
        aNContext: ::std::os::raw::c_uint,
        aAlphabetConfigPath: *const ::std::os::raw::c_char,
        aBeamWidth: ::std::os::raw::c_uint,
        retval: *mut *mut ModelState,
    ) -> ::std::os::raw::c_int;
}
extern "C" {
    /// @brief Frees associated resources and destroys model object.
    #[link_name = "\u{1}__Z15DS_DestroyModelP10ModelState"]
    pub fn DS_DestroyModel(ctx: *mut ModelState);
}
extern "C" {
    /// @brief Enable decoding using beam scoring with a KenLM language model.
    ///
    /// @param aCtx The ModelState pointer for the model being changed.
    /// @param aAlphabetConfigPath The path to the configuration file specifying
    /// the alphabet used by the network. See alphabet.h.
    /// @param aLMPath The path to the language model binary file.
    /// @param aTriePath The path to the trie file build from the same vocabu-
    /// lary as the language model binary.
    /// @param aLMWeight The weight to give to language model results when sco-
    /// ring.
    /// @param aValidWordCountWeight The weight (bonus) to give to beams when
    /// adding a new valid word to the decoding.
    ///
    /// @return Zero on success, non-zero on failure (invalid arguments).
    #[link_name = "\u{1}__Z22DS_EnableDecoderWithLMP10ModelStatePKcS2_S2_ff"]
    pub fn DS_EnableDecoderWithLM(
        aCtx: *mut ModelState,
        aAlphabetConfigPath: *const ::std::os::raw::c_char,
        aLMPath: *const ::std::os::raw::c_char,
        aTriePath: *const ::std::os::raw::c_char,
        aLMWeight: f32,
        aValidWordCountWeight: f32,
    ) -> ::std::os::raw::c_int;
}
extern "C" {
    /// @brief Use the DeepSpeech model to perform Speech-To-Text.
    ///
    /// @param aCtx The ModelState pointer for the model to use.
    /// @param aBuffer A 16-bit, mono raw audio signal at the appropriate
    /// sample rate.
    /// @param aBufferSize The number of samples in the audio signal.
    /// @param aSampleRate The sample-rate of the audio signal.
    ///
    /// @return The STT result. The user is responsible for freeing the string.
    /// Returns NULL on error.
    #[link_name = "\u{1}__Z15DS_SpeechToTextP10ModelStatePKsjj"]
    pub fn DS_SpeechToText(
        aCtx: *mut ModelState,
        aBuffer: *const ::std::os::raw::c_short,
        aBufferSize: ::std::os::raw::c_uint,
        aSampleRate: ::std::os::raw::c_uint,
    ) -> *mut ::std::os::raw::c_char;
}
extern "C" {
    /// @brief Create a new streaming inference state. The streaming state returned
    /// by this function can then be passed to {@link DS_FeedAudioContent()}
    /// and {@link DS_FinishStream()}.
    ///
    /// @param aCtx The ModelState pointer for the model to use.
    /// @param aPreAllocFrames Number of timestep frames to reserve. One timestep
    /// is equivalent to two window lengths (20ms). If set to
    /// 0 we reserve enough frames for 3 seconds of audio (150).
    /// @param aSampleRate The sample-rate of the audio signal.
    /// @param[out] retval an opaque pointer that represents the streaming state. Can
    /// be NULL if an error occurs.
    ///
    /// @return Zero for success, non-zero on failure.
    #[link_name = "\u{1}__Z14DS_SetupStreamP10ModelStatejjPP14StreamingState"]
    pub fn DS_SetupStream(
        aCtx: *mut ModelState,
        aPreAllocFrames: ::std::os::raw::c_uint,
        aSampleRate: ::std::os::raw::c_uint,
        retval: *mut *mut StreamingState,
    ) -> ::std::os::raw::c_int;
}
extern "C" {
    /// @brief Feed audio samples to an ongoing streaming inference.
    ///
    /// @param aSctx A streaming state pointer returned by {@link DS_SetupStream()}.
    /// @param aBuffer An array of 16-bit, mono raw audio samples at the
    /// appropriate sample rate.
    /// @param aBufferSize The number of samples in @p aBuffer.
    #[link_name = "\u{1}__Z19DS_FeedAudioContentP14StreamingStatePKsj"]
    pub fn DS_FeedAudioContent(
        aSctx: *mut StreamingState,
        aBuffer: *const ::std::os::raw::c_short,
        aBufferSize: ::std::os::raw::c_uint,
    );
}
extern "C" {
    /// @brief Compute the intermediate decoding of an ongoing streaming inference.
    /// This is an expensive process as the decoder implementation isn't
    /// currently capable of streaming, so it always starts from the beginning
    /// of the audio.
    ///
    /// @param aSctx A streaming state pointer returned by {@link DS_SetupStream()}.
    ///
    /// @return The STT intermediate result. The user is responsible for freeing the
    /// string.
    #[link_name = "\u{1}__Z21DS_IntermediateDecodeP14StreamingState"]
    pub fn DS_IntermediateDecode(aSctx: *mut StreamingState) -> *mut ::std::os::raw::c_char;
}
extern "C" {
    /// @brief Signal the end of an audio signal to an ongoing streaming
    /// inference, returns the STT result over the whole audio signal.
    ///
    /// @param aSctx A streaming state pointer returned by {@link DS_SetupStream()}.
    ///
    /// @return The STT result. The user is responsible for freeing the string.
    ///
    /// @note This method will free the state pointer (@p aSctx).
    #[link_name = "\u{1}__Z15DS_FinishStreamP14StreamingState"]
    pub fn DS_FinishStream(aSctx: *mut StreamingState) -> *mut ::std::os::raw::c_char;
}
extern "C" {
    /// @brief Destroy a streaming state without decoding the computed logits. This
    /// can be used if you no longer need the result of an ongoing streaming
    /// inference and don't want to perform a costly decode operation.
    ///
    /// @param aSctx A streaming state pointer returned by {@link DS_SetupStream()}.
    ///
    /// @note This method will free the state pointer (@p aSctx).
    #[link_name = "\u{1}__Z16DS_DiscardStreamP14StreamingState"]
    pub fn DS_DiscardStream(aSctx: *mut StreamingState);
}
extern "C" {
    /// @brief Given audio, return a vector suitable for input to a DeepSpeech
    /// model trained with the given parameters.
    ///
    /// Extracts MFCC features from a given audio signal and adds the appropriate
    /// amount of context to run inference on a DeepSpeech model trained with
    /// the given parameters.
    ///
    /// @param aBuffer A 16-bit, mono raw audio signal at the appropriate sample
    /// rate.
    /// @param aBufferSize The sample-length of the audio signal.
    /// @param aSampleRate The sample-rate of the audio signal.
    /// @param aNCep The number of cepstrum.
    /// @param aNContext The size of the context window.
    /// @param[out] aMfcc An array containing features, of shape
    /// (@p aNFrames, ncep * ncontext). The user is responsible
    /// for freeing the array.
    /// @param[out] aNFrames (optional) The number of frames in @p aMfcc.
    /// @param[out] aFrameLen (optional) The length of each frame
    /// (ncep * ncontext) in @p aMfcc.
    #[link_name = "\u{1}__Z21DS_AudioToInputVectorPKsjjjjPPfPiS3_"]
    pub fn DS_AudioToInputVector(
        aBuffer: *const ::std::os::raw::c_short,
        aBufferSize: ::std::os::raw::c_uint,
        aSampleRate: ::std::os::raw::c_uint,
        aNCep: ::std::os::raw::c_uint,
        aNContext: ::std::os::raw::c_uint,
        aMfcc: *mut *mut f32,
        aNFrames: *mut ::std::os::raw::c_int,
        aFrameLen: *mut ::std::os::raw::c_int,
    );
}
extern "C" {
    /// @brief Print version of this library and of the linked TensorFlow library.
    #[link_name = "\u{1}__Z16DS_PrintVersionsv"]
    pub fn DS_PrintVersions();
}