yamabiko_whisper/lib.rs
1//! Streaming speech recognition on top of `whisper-rs`, using the
2//! LocalAgreement-2 policy from Macháček et al. 2023.
3//!
4//! `whisper-rs` is fully encapsulated: callers do not need to add it
5//! to their own `Cargo.toml` or import any of its types. Build a
6//! processor with [`OnlineAsrModel::create_processor`] or
7//! [`OnlineAsrProcessor::from_model_path`], feed it
8//! 16 kHz mono f32 PCM via [`OnlineAsrProcessor::insert_audio_chunk`],
9//! or let [`AsrPipeline`] handle downmixing, resampling, and chunking
10//! for a microphone/file/network source.
11//!
12//! Accelerated whisper.cpp backends are exposed as Cargo features
13//! (`cuda`, `vulkan`, `metal`, `coreml`, `hipblas`, `intel-sycl`,
14//! `openblas`, `openmp`). Use [`BackendConfig`] when loading a model
15//! to select a GPU device or force CPU execution.
16
17mod audio;
18mod error;
19mod hypothesis_buffer;
20mod online_asr;
21
22pub use audio::{AsrPipeline, AudioInputConfig, AudioSample, LinearResampler, downmix_interleaved};
23pub use error::Error;
24pub use hypothesis_buffer::Word;
25pub use online_asr::{
26 BackendConfig, DecodingStrategy, OnlineAsrConfig, OnlineAsrModel, OnlineAsrProcessor,
27 ProcessOutput, SAMPLE_RATE, VadConfig, VadModel,
28};
29
30/// Forward whisper.cpp / GGML / VAD logs to a `log` / `tracing`
31/// backend. Without calling this they are silently dropped, which is
32/// usually what you want; call this once at startup if you do want to
33/// see them. Thin wrapper around `whisper_rs::install_logging_hooks`
34/// so callers do not need to depend on `whisper-rs` directly.
35pub fn install_log_hooks() {
36 whisper_rs::install_logging_hooks();
37}