#[cfg(windows)]
use libloading::os::windows::Library as WinLibrary;
use libloading::Library;
use std::ffi::{c_char, c_float, c_int, CStr, CString};
use std::path::{Path, PathBuf};
use std::sync::OnceLock;
type WhisperContext = *mut std::ffi::c_void;
type WhisperNewSegmentCallback = *const std::ffi::c_void;
type WhisperProgressCallback = *const std::ffi::c_void;
type WhisperEncoderBeginCallback = *const std::ffi::c_void;
type WhisperAbortCallback = *const std::ffi::c_void;
type WhisperLogitsFilterCallback = *const std::ffi::c_void;
type WhisperGrammarElement = *const std::ffi::c_void;
#[repr(C)]
#[derive(Clone, Copy)]
pub struct WhisperVadParams {
pub threshold: c_float,
pub min_speech_duration_ms: c_int,
pub min_silence_duration_ms: c_int,
pub max_speech_duration_s: c_float,
pub speech_pad_ms: c_int,
pub samples_overlap: c_float,
}
#[repr(C)]
#[derive(Clone)]
pub struct WhisperFullParams {
pub strategy: c_int,
pub n_threads: c_int,
pub n_max_text_ctx: c_int,
pub offset_ms: c_int,
pub duration_ms: c_int,
pub translate: bool,
pub no_context: bool,
pub no_timestamps: bool,
pub single_segment: bool,
pub print_special: bool,
pub print_progress: bool,
pub print_realtime: bool,
pub print_timestamps: bool,
pub token_timestamps: bool,
pub thold_pt: c_float,
pub thold_ptsum: c_float,
pub max_len: c_int,
pub split_on_word: bool,
pub max_tokens: c_int,
pub debug_mode: bool,
pub audio_ctx: c_int,
pub tdrz_enable: bool,
pub suppress_regex: *const c_char,
pub initial_prompt: *const c_char,
pub carry_initial_prompt: bool,
pub prompt_tokens: *const c_int,
pub prompt_n_tokens: c_int,
pub language: *const c_char,
pub detect_language: bool,
pub suppress_blank: bool,
pub suppress_nst: bool,
pub temperature: c_float,
pub max_initial_ts: c_float,
pub length_penalty: c_float,
pub temperature_inc: c_float,
pub entropy_thold: c_float,
pub logprob_thold: c_float,
pub no_speech_thold: c_float,
pub greedy_best_of: c_int,
pub beam_search_beam_size: c_int,
pub beam_search_patience: c_float,
pub new_segment_callback: WhisperNewSegmentCallback,
pub new_segment_callback_user_data: *mut std::ffi::c_void,
pub progress_callback: WhisperProgressCallback,
pub progress_callback_user_data: *mut std::ffi::c_void,
pub encoder_begin_callback: WhisperEncoderBeginCallback,
pub encoder_begin_callback_user_data: *mut std::ffi::c_void,
pub abort_callback: WhisperAbortCallback,
pub abort_callback_user_data: *mut std::ffi::c_void,
pub logits_filter_callback: WhisperLogitsFilterCallback,
pub logits_filter_callback_user_data: *mut std::ffi::c_void,
pub grammar_rules: *const WhisperGrammarElement,
pub n_grammar_rules: usize,
pub i_start_rule: usize,
pub grammar_penalty: c_float,
pub vad: bool,
pub vad_model_path: *const c_char,
pub vad_params: WhisperVadParams,
}
impl WhisperFullParams {
#[allow(dead_code)]
pub fn configure_for_short_audio(&mut self, n_samples: usize, duration_ms: c_int) {
self.no_context = true;
self.single_segment = true;
self.suppress_blank = true;
self.no_timestamps = true;
self.print_special = false;
self.print_progress = false;
self.print_realtime = false;
self.print_timestamps = false;
self.duration_ms = duration_ms + 100;
self.logprob_thold = -2.0;
self.entropy_thold = 0.0;
self.temperature_inc = 0.0;
self.max_tokens = 16;
let mel_frames = (n_samples / 160 + 1).min(1500) as c_int;
self.audio_ctx = mel_frames;
}
pub fn configure_with_hallucination_mitigation(&mut self) {
self.no_context = true;
self.single_segment = false;
self.suppress_blank = true;
self.no_timestamps = false;
self.print_special = false;
self.print_progress = false;
self.print_realtime = false;
self.print_timestamps = false;
self.duration_ms = 0;
self.max_tokens = 0;
self.entropy_thold = 2.4;
self.logprob_thold = -0.8;
self.no_speech_thold = 0.6;
self.suppress_nst = true;
self.temperature = 0.0;
self.temperature_inc = 0.2;
self.length_penalty = 1.0;
}
}
#[repr(C)]
#[allow(dead_code)]
pub enum WhisperSamplingStrategy {
Greedy = 0,
BeamSearch = 1,
}
static WHISPER_LIB: OnceLock<Option<WhisperLibrary>> = OnceLock::new();
#[cfg(not(target_os = "macos"))]
static GGML_LIB: OnceLock<Option<GgmlLibrary>> = OnceLock::new();
#[cfg(not(target_os = "macos"))]
type GgmlBackendReg = *mut std::ffi::c_void;
#[cfg(not(target_os = "macos"))]
#[allow(dead_code)]
struct GgmlLibrary {
_lib: Library,
backend_load_all_from_path: unsafe extern "C" fn(dir_path: *const c_char),
backend_register: unsafe extern "C" fn(reg: GgmlBackendReg),
}
#[cfg(not(target_os = "macos"))]
unsafe impl Send for GgmlLibrary {}
#[cfg(not(target_os = "macos"))]
unsafe impl Sync for GgmlLibrary {}
#[cfg(not(target_os = "macos"))]
impl GgmlLibrary {
fn load<P: AsRef<Path>>(path: P) -> Result<Self, String> {
unsafe {
#[cfg(windows)]
let lib: Library = {
WinLibrary::load_with_flags(path.as_ref(), 0x100 | 0x1000)
.map(|l| l.into())
.map_err(|e| format!("Failed to load ggml library: {}", e))?
};
#[cfg(not(windows))]
let lib: Library = Library::new(path.as_ref())
.map_err(|e| format!("Failed to load ggml library: {}", e))?;
let backend_load_all_from_path = *lib
.get::<unsafe extern "C" fn(*const c_char)>(b"ggml_backend_load_all_from_path\0")
.map_err(|e| format!("Failed to load ggml_backend_load_all_from_path: {}", e))?;
let backend_register = *lib
.get::<unsafe extern "C" fn(GgmlBackendReg)>(b"ggml_backend_register\0")
.map_err(|e| format!("Failed to load ggml_backend_register: {}", e))?;
Ok(Self {
_lib: lib,
backend_load_all_from_path,
backend_register,
})
}
}
fn load_backends_from_path(&self, dir_path: &Path) {
let path_str = dir_path.to_string_lossy();
let c_path = CString::new(path_str.as_ref()).unwrap_or_default();
unsafe {
(self.backend_load_all_from_path)(c_path.as_ptr());
}
}
#[allow(dead_code)]
fn register_backend(&self, reg: GgmlBackendReg) {
unsafe {
(self.backend_register)(reg);
}
}
}
#[cfg(windows)]
#[allow(dead_code)]
fn try_load_cuda_backend(ggml_lib: &GgmlLibrary, lib_dir: &Path) {
let cuda_dll_path = lib_dir.join("ggml-cuda.dll");
if !cuda_dll_path.exists() {
tracing::debug!("ggml-cuda.dll not found at {}", cuda_dll_path.display());
return;
}
tracing::info!("Found ggml-cuda.dll, attempting to load CUDA backend...");
unsafe {
let cuda_lib = match Library::new(&cuda_dll_path) {
Ok(lib) => lib,
Err(e) => {
tracing::warn!("Failed to load ggml-cuda.dll: {}", e);
return;
}
};
let cuda_reg_fn = match cuda_lib
.get::<unsafe extern "C" fn() -> GgmlBackendReg>(b"ggml_backend_cuda_reg\0")
{
Ok(f) => *f,
Err(e) => {
tracing::warn!("Failed to find ggml_backend_cuda_reg: {}", e);
return;
}
};
let cuda_reg = cuda_reg_fn();
if cuda_reg.is_null() {
tracing::warn!("ggml_backend_cuda_reg returned null");
return;
}
tracing::info!("Registering CUDA backend...");
ggml_lib.register_backend(cuda_reg);
tracing::info!("CUDA backend registered successfully");
std::mem::forget(cuda_lib);
}
}
#[cfg(all(not(windows), not(target_os = "macos")))]
#[allow(dead_code)]
fn try_load_cuda_backend(_ggml_lib: &GgmlLibrary, _lib_dir: &Path) {
}
pub struct WhisperLibrary {
_lib: Library,
init_from_file: unsafe extern "C" fn(path_model: *const c_char) -> WhisperContext,
free: unsafe extern "C" fn(ctx: WhisperContext),
full_default_params: unsafe extern "C" fn(strategy: c_int) -> WhisperFullParams,
full: unsafe extern "C" fn(
ctx: WhisperContext,
params: WhisperFullParams,
samples: *const c_float,
n_samples: c_int,
) -> c_int,
full_n_segments: unsafe extern "C" fn(ctx: WhisperContext) -> c_int,
full_get_segment_text:
unsafe extern "C" fn(ctx: WhisperContext, i_segment: c_int) -> *const c_char,
print_system_info: unsafe extern "C" fn() -> *const c_char,
}
unsafe impl Send for WhisperLibrary {}
unsafe impl Sync for WhisperLibrary {}
impl WhisperLibrary {
pub fn load<P: AsRef<Path>>(path: P) -> Result<Self, String> {
unsafe {
#[cfg(windows)]
let lib: Library = {
WinLibrary::load_with_flags(path.as_ref(), 0x100 | 0x1000)
.map(|l| l.into())
.map_err(|e| format!("Failed to load whisper library: {}", e))?
};
#[cfg(not(windows))]
let lib: Library = Library::new(path.as_ref())
.map_err(|e| format!("Failed to load whisper library: {}", e))?;
let init_from_file = *lib
.get::<unsafe extern "C" fn(*const c_char) -> WhisperContext>(
b"whisper_init_from_file\0",
)
.map_err(|e| format!("Failed to load whisper_init_from_file: {}", e))?;
let free = *lib
.get::<unsafe extern "C" fn(WhisperContext)>(b"whisper_free\0")
.map_err(|e| format!("Failed to load whisper_free: {}", e))?;
let full_default_params = *lib
.get::<unsafe extern "C" fn(c_int) -> WhisperFullParams>(
b"whisper_full_default_params\0",
)
.map_err(|e| format!("Failed to load whisper_full_default_params: {}", e))?;
let full = *lib
.get::<unsafe extern "C" fn(
WhisperContext,
WhisperFullParams,
*const c_float,
c_int,
) -> c_int>(b"whisper_full\0")
.map_err(|e| format!("Failed to load whisper_full: {}", e))?;
let full_n_segments = *lib
.get::<unsafe extern "C" fn(WhisperContext) -> c_int>(b"whisper_full_n_segments\0")
.map_err(|e| format!("Failed to load whisper_full_n_segments: {}", e))?;
let full_get_segment_text = *lib
.get::<unsafe extern "C" fn(WhisperContext, c_int) -> *const c_char>(
b"whisper_full_get_segment_text\0",
)
.map_err(|e| format!("Failed to load whisper_full_get_segment_text: {}", e))?;
let print_system_info = *lib
.get::<unsafe extern "C" fn() -> *const c_char>(b"whisper_print_system_info\0")
.map_err(|e| format!("Failed to load whisper_print_system_info: {}", e))?;
Ok(Self {
_lib: lib,
init_from_file,
free,
full_default_params,
full,
full_n_segments,
full_get_segment_text,
print_system_info,
})
}
}
}
#[cfg(windows)]
fn add_dll_directory(dir: &Path) {
use std::os::windows::ffi::OsStrExt;
#[link(name = "kernel32")]
extern "system" {
fn AddDllDirectory(path: *const u16) -> *mut std::ffi::c_void;
}
let dir_str = dir.to_string_lossy();
let stripped: &str = dir_str.strip_prefix(r"\\?\").unwrap_or(&dir_str);
let stripped_path = std::path::Path::new(stripped);
let wide: Vec<u16> = stripped_path
.as_os_str()
.encode_wide()
.chain(std::iter::once(0))
.collect();
let cookie = unsafe { AddDllDirectory(wide.as_ptr()) };
if !cookie.is_null() {
tracing::debug!("Added DLL search directory: {}", dir.display());
let _ = cookie;
} else {
tracing::warn!("Failed to add DLL search directory: {}", dir.display());
}
}
fn init_ggml_backends() {
#[cfg(target_os = "macos")]
{
}
#[cfg(not(target_os = "macos"))]
{
GGML_LIB.get_or_init(|| {
let lib_name = if cfg!(windows) {
"ggml.dll"
} else {
"libggml.so"
};
let mut search_paths: Vec<PathBuf> = Vec::new();
if let Ok(exe_path) = std::env::current_exe() {
if let Some(parent) = exe_path.parent() {
search_paths.push(parent.join(lib_name));
}
}
search_paths.push(std::env::current_dir().unwrap_or_default().join(lib_name));
search_paths.extend(resource_dir_paths(lib_name));
search_paths.push(std::path::PathBuf::from(lib_name));
for path in search_paths.iter() {
if path.exists() {
let lib_dir = path.parent().map(|p| p.to_path_buf());
#[cfg(windows)]
if let Some(ref dir) = lib_dir {
add_dll_directory(dir);
}
match GgmlLibrary::load(path) {
Ok(lib) => {
tracing::info!("Loaded ggml library from: {}", path.display());
if let Some(ref dir) = lib_dir {
tracing::info!("Loading ggml backends from: {}", dir.display());
lib.load_backends_from_path(dir);
#[cfg(windows)]
try_load_cuda_backend(&lib, dir);
tracing::info!("ggml backends loaded");
}
return Some(lib);
}
Err(e) => {
tracing::warn!(
"Failed to load ggml library from {}: {}",
path.display(),
e
);
}
}
}
}
match GgmlLibrary::load(lib_name) {
Ok(lib) => {
tracing::info!("Loaded ggml library from system path");
let cwd = std::env::current_dir().unwrap_or_default();
lib.load_backends_from_path(&cwd);
Some(lib)
}
Err(e) => {
tracing::warn!(
"Failed to load ggml library: {} - GPU backends may not be available",
e
);
None
}
}
});
}
}
pub fn init_library() -> Result<(), String> {
init_ggml_backends();
WHISPER_LIB.get_or_init(|| {
let lib_name = if cfg!(windows) {
"whisper.dll"
} else if cfg!(target_os = "macos") {
"libwhisper.dylib"
} else {
"libwhisper.so"
};
let mut base_paths: Vec<PathBuf> = Vec::new();
if let Ok(exe_path) = std::env::current_exe() {
if let Some(parent) = exe_path.parent() {
base_paths.push(parent.join(lib_name));
}
}
base_paths.push(std::env::current_dir().unwrap_or_default().join(lib_name));
base_paths.extend(resource_dir_paths(lib_name));
base_paths.push(std::path::PathBuf::from(lib_name));
#[cfg(target_os = "macos")]
let macos_paths: Vec<PathBuf> = if let Some(exe_path) = std::env::current_exe()
.ok()
.and_then(|p| p.parent().map(|p| p.to_path_buf()))
{
vec![
exe_path.join("../Resources/Frameworks").join(lib_name),
exe_path.join("../Resources").join(lib_name),
exe_path.join("../Frameworks").join(lib_name),
]
} else {
vec![]
};
#[cfg(not(target_os = "macos"))]
let macos_paths: Vec<PathBuf> = vec![];
for path in base_paths.iter().chain(macos_paths.iter()) {
if path.exists() {
#[cfg(windows)]
if let Some(lib_dir) = path.parent() {
add_dll_directory(lib_dir);
}
match WhisperLibrary::load(path) {
Ok(lib) => {
tracing::info!("Loaded whisper library from: {}", path.display());
return Some(lib);
}
Err(e) => {
tracing::warn!(
"Failed to load whisper library from {}: {}",
path.display(),
e
);
}
}
}
}
match WhisperLibrary::load(lib_name) {
Ok(lib) => {
tracing::info!("Loaded whisper library from system path");
Some(lib)
}
Err(e) => {
tracing::error!("Failed to load whisper library: {}", e);
None
}
}
});
if WHISPER_LIB.get().and_then(|l| l.as_ref()).is_some() {
Ok(())
} else {
Err("Whisper library not available".to_string())
}
}
fn resource_dir_paths(lib_name: &str) -> Vec<PathBuf> {
let mut paths = Vec::new();
#[cfg(windows)]
if let Ok(exe_path) = std::env::current_exe() {
if let Some(exe_dir) = exe_path.parent() {
paths.push(exe_dir.join("cuda").join(lib_name));
paths.push(exe_dir.join("cpu").join(lib_name));
}
}
if let Ok(resource_dir) = std::env::var("VTX_RESOURCE_DIR") {
if !resource_dir.is_empty() {
let base = PathBuf::from(resource_dir);
#[cfg(windows)]
{
paths.push(base.join("binaries").join("cuda").join(lib_name));
paths.push(base.join("binaries").join("cpu").join(lib_name));
}
paths.push(base.join(lib_name));
paths.push(base.join("binaries").join(lib_name));
}
}
paths
}
fn get_lib() -> Result<&'static WhisperLibrary, String> {
WHISPER_LIB
.get()
.and_then(|l| l.as_ref())
.ok_or_else(|| "Whisper library not loaded".to_string())
}
pub struct Context {
ptr: WhisperContext,
}
unsafe impl Send for Context {}
impl Context {
pub fn new<P: AsRef<Path>>(model_path: P) -> Result<Self, String> {
let lib = get_lib()?;
let path_str = model_path.as_ref().to_str().ok_or("Invalid model path")?;
let c_path = CString::new(path_str).map_err(|e| format!("Invalid path: {}", e))?;
let ptr = unsafe { (lib.init_from_file)(c_path.as_ptr()) };
if ptr.is_null() {
return Err(format!(
"Failed to initialize whisper context from: {}",
path_str
));
}
Ok(Self { ptr })
}
pub fn full(&self, params: &WhisperFullParams, samples: &[f32]) -> Result<(), String> {
let lib = get_lib()?;
let result = unsafe {
(lib.full)(
self.ptr,
params.clone(),
samples.as_ptr(),
samples.len() as c_int,
)
};
if result != 0 {
return Err(format!("Transcription failed with code: {}", result));
}
Ok(())
}
pub fn full_n_segments(&self) -> Result<i32, String> {
let lib = get_lib()?;
Ok(unsafe { (lib.full_n_segments)(self.ptr) })
}
pub fn full_get_segment_text(&self, i_segment: i32) -> Result<String, String> {
let lib = get_lib()?;
let ptr = unsafe { (lib.full_get_segment_text)(self.ptr, i_segment) };
if ptr.is_null() {
return Err(format!("Failed to get segment {} text", i_segment));
}
let c_str = unsafe { CStr::from_ptr(ptr) };
c_str
.to_str()
.map(|s| s.to_string())
.map_err(|e| format!("Invalid UTF-8 in segment: {}", e))
}
}
impl Drop for Context {
fn drop(&mut self) {
if let Ok(lib) = get_lib() {
unsafe { (lib.free)(self.ptr) };
}
}
}
pub fn full_default_params(strategy: WhisperSamplingStrategy) -> Result<WhisperFullParams, String> {
let lib = get_lib()?;
Ok(unsafe { (lib.full_default_params)(strategy as c_int) })
}
pub fn get_system_info() -> Result<String, String> {
let lib = get_lib()?;
let ptr = unsafe { (lib.print_system_info)() };
if ptr.is_null() {
return Err("Failed to get system info".to_string());
}
let c_str = unsafe { CStr::from_ptr(ptr) };
c_str
.to_str()
.map(|s| s.to_string())
.map_err(|e| format!("Invalid UTF-8 in system info: {}", e))
}