1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
/* automatically generated by rust-bindgen */
pub type max_align_t = f64;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct ModelState {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct StreamingState {
_unused: [u8; 0],
}
extern "C" {
/// @brief An object providing an interface to a trained DeepSpeech model.
///
/// @param aModelPath The path to the frozen model graph.
/// @param aNCep The number of cepstrum the model was trained with.
/// @param aNContext The context window the model was trained with.
/// @param aAlphabetConfigPath The path to the configuration file specifying
/// the alphabet used by the network. See alphabet.h.
/// @param aBeamWidth The beam width used by the decoder. A larger beam
/// width generates better results at the cost of decoding
/// time.
/// @param[out] retval a ModelState pointer
///
/// @return Zero on success, non-zero on failure.
#[link_name = "\u{1}__Z14DS_CreateModelPKcjjS0_jPP10ModelState"]
pub fn DS_CreateModel(
aModelPath: *const ::std::os::raw::c_char,
aNCep: ::std::os::raw::c_uint,
aNContext: ::std::os::raw::c_uint,
aAlphabetConfigPath: *const ::std::os::raw::c_char,
aBeamWidth: ::std::os::raw::c_uint,
retval: *mut *mut ModelState,
) -> ::std::os::raw::c_int;
}
extern "C" {
/// @brief Frees associated resources and destroys model object.
#[link_name = "\u{1}__Z15DS_DestroyModelP10ModelState"]
pub fn DS_DestroyModel(ctx: *mut ModelState);
}
extern "C" {
/// @brief Enable decoding using beam scoring with a KenLM language model.
///
/// @param aCtx The ModelState pointer for the model being changed.
/// @param aAlphabetConfigPath The path to the configuration file specifying
/// the alphabet used by the network. See alphabet.h.
/// @param aLMPath The path to the language model binary file.
/// @param aTriePath The path to the trie file build from the same vocabu-
/// lary as the language model binary.
/// @param aLMWeight The weight to give to language model results when sco-
/// ring.
/// @param aValidWordCountWeight The weight (bonus) to give to beams when
/// adding a new valid word to the decoding.
///
/// @return Zero on success, non-zero on failure (invalid arguments).
#[link_name = "\u{1}__Z22DS_EnableDecoderWithLMP10ModelStatePKcS2_S2_ff"]
pub fn DS_EnableDecoderWithLM(
aCtx: *mut ModelState,
aAlphabetConfigPath: *const ::std::os::raw::c_char,
aLMPath: *const ::std::os::raw::c_char,
aTriePath: *const ::std::os::raw::c_char,
aLMWeight: f32,
aValidWordCountWeight: f32,
) -> ::std::os::raw::c_int;
}
extern "C" {
/// @brief Use the DeepSpeech model to perform Speech-To-Text.
///
/// @param aCtx The ModelState pointer for the model to use.
/// @param aBuffer A 16-bit, mono raw audio signal at the appropriate
/// sample rate.
/// @param aBufferSize The number of samples in the audio signal.
/// @param aSampleRate The sample-rate of the audio signal.
///
/// @return The STT result. The user is responsible for freeing the string.
/// Returns NULL on error.
#[link_name = "\u{1}__Z15DS_SpeechToTextP10ModelStatePKsjj"]
pub fn DS_SpeechToText(
aCtx: *mut ModelState,
aBuffer: *const ::std::os::raw::c_short,
aBufferSize: ::std::os::raw::c_uint,
aSampleRate: ::std::os::raw::c_uint,
) -> *mut ::std::os::raw::c_char;
}
extern "C" {
/// @brief Create a new streaming inference state. The streaming state returned
/// by this function can then be passed to {@link DS_FeedAudioContent()}
/// and {@link DS_FinishStream()}.
///
/// @param aCtx The ModelState pointer for the model to use.
/// @param aPreAllocFrames Number of timestep frames to reserve. One timestep
/// is equivalent to two window lengths (20ms). If set to
/// 0 we reserve enough frames for 3 seconds of audio (150).
/// @param aSampleRate The sample-rate of the audio signal.
/// @param[out] retval an opaque pointer that represents the streaming state. Can
/// be NULL if an error occurs.
///
/// @return Zero for success, non-zero on failure.
#[link_name = "\u{1}__Z14DS_SetupStreamP10ModelStatejjPP14StreamingState"]
pub fn DS_SetupStream(
aCtx: *mut ModelState,
aPreAllocFrames: ::std::os::raw::c_uint,
aSampleRate: ::std::os::raw::c_uint,
retval: *mut *mut StreamingState,
) -> ::std::os::raw::c_int;
}
extern "C" {
/// @brief Feed audio samples to an ongoing streaming inference.
///
/// @param aSctx A streaming state pointer returned by {@link DS_SetupStream()}.
/// @param aBuffer An array of 16-bit, mono raw audio samples at the
/// appropriate sample rate.
/// @param aBufferSize The number of samples in @p aBuffer.
#[link_name = "\u{1}__Z19DS_FeedAudioContentP14StreamingStatePKsj"]
pub fn DS_FeedAudioContent(
aSctx: *mut StreamingState,
aBuffer: *const ::std::os::raw::c_short,
aBufferSize: ::std::os::raw::c_uint,
);
}
extern "C" {
/// @brief Compute the intermediate decoding of an ongoing streaming inference.
/// This is an expensive process as the decoder implementation isn't
/// currently capable of streaming, so it always starts from the beginning
/// of the audio.
///
/// @param aSctx A streaming state pointer returned by {@link DS_SetupStream()}.
///
/// @return The STT intermediate result. The user is responsible for freeing the
/// string.
#[link_name = "\u{1}__Z21DS_IntermediateDecodeP14StreamingState"]
pub fn DS_IntermediateDecode(aSctx: *mut StreamingState) -> *mut ::std::os::raw::c_char;
}
extern "C" {
/// @brief Signal the end of an audio signal to an ongoing streaming
/// inference, returns the STT result over the whole audio signal.
///
/// @param aSctx A streaming state pointer returned by {@link DS_SetupStream()}.
///
/// @return The STT result. The user is responsible for freeing the string.
///
/// @note This method will free the state pointer (@p aSctx).
#[link_name = "\u{1}__Z15DS_FinishStreamP14StreamingState"]
pub fn DS_FinishStream(aSctx: *mut StreamingState) -> *mut ::std::os::raw::c_char;
}
extern "C" {
/// @brief Destroy a streaming state without decoding the computed logits. This
/// can be used if you no longer need the result of an ongoing streaming
/// inference and don't want to perform a costly decode operation.
///
/// @param aSctx A streaming state pointer returned by {@link DS_SetupStream()}.
///
/// @note This method will free the state pointer (@p aSctx).
#[link_name = "\u{1}__Z16DS_DiscardStreamP14StreamingState"]
pub fn DS_DiscardStream(aSctx: *mut StreamingState);
}
extern "C" {
/// @brief Given audio, return a vector suitable for input to a DeepSpeech
/// model trained with the given parameters.
///
/// Extracts MFCC features from a given audio signal and adds the appropriate
/// amount of context to run inference on a DeepSpeech model trained with
/// the given parameters.
///
/// @param aBuffer A 16-bit, mono raw audio signal at the appropriate sample
/// rate.
/// @param aBufferSize The sample-length of the audio signal.
/// @param aSampleRate The sample-rate of the audio signal.
/// @param aNCep The number of cepstrum.
/// @param aNContext The size of the context window.
/// @param[out] aMfcc An array containing features, of shape
/// (@p aNFrames, ncep * ncontext). The user is responsible
/// for freeing the array.
/// @param[out] aNFrames (optional) The number of frames in @p aMfcc.
/// @param[out] aFrameLen (optional) The length of each frame
/// (ncep * ncontext) in @p aMfcc.
#[link_name = "\u{1}__Z21DS_AudioToInputVectorPKsjjjjPPfPiS3_"]
pub fn DS_AudioToInputVector(
aBuffer: *const ::std::os::raw::c_short,
aBufferSize: ::std::os::raw::c_uint,
aSampleRate: ::std::os::raw::c_uint,
aNCep: ::std::os::raw::c_uint,
aNContext: ::std::os::raw::c_uint,
aMfcc: *mut *mut f32,
aNFrames: *mut ::std::os::raw::c_int,
aFrameLen: *mut ::std::os::raw::c_int,
);
}
extern "C" {
/// @brief Print version of this library and of the linked TensorFlow library.
#[link_name = "\u{1}__Z16DS_PrintVersionsv"]
pub fn DS_PrintVersions();
}