Skip to main content

qwen_asr/
lib.rs

1//! CPU-only Qwen3-ASR speech recognition in pure Rust.
2//!
3//! BLAS and SIMD optimizations are selected automatically at compile time based
4//! on the target platform — Accelerate + NEON on macOS/aarch64, OpenBLAS + AVX2
5//! on Linux/x86_64, etc. For best performance on x86_64, build with:
6//!
7//! ```bash
8//! RUSTFLAGS="-C target-cpu=native" cargo build --release
9//! ```
10//!
11//! **Important:** Always build in release mode (`--release`). Debug builds are
12//! 10–50x slower and unusable for real-time inference.
13//!
14//! # Quick Start
15//!
16//! ```rust,no_run
17//! use qwen_asr::context::QwenCtx;
18//! use qwen_asr::transcribe;
19//!
20//! let mut ctx = QwenCtx::load("qwen3-asr-0.6b").expect("model not found");
21//! let text = transcribe::transcribe(&mut ctx, "audio.wav").unwrap();
22//! println!("{text}");
23//! ```
24//!
25//! # Forced Alignment
26//!
27//! With the aligner model variant you can obtain word-level timestamps for a
28//! known transcript:
29//!
30//! ```rust,no_run
31//! use qwen_asr::context::QwenCtx;
32//! use qwen_asr::align;
33//!
34//! let mut ctx = QwenCtx::load("qwen3-aligner-0.6b").expect("aligner model not found");
35//! let samples: Vec<f32> = vec![]; // 16 kHz mono f32 PCM
36//! let results = align::forced_align(&mut ctx, &samples, "Hello world", "English").unwrap();
37//! for r in &results {
38//!     println!("{}: {:.0} – {:.0} ms", r.text, r.start_ms, r.end_ms);
39//! }
40//! ```
41//!
42//! # Module Guide
43//!
44//! | Module | Purpose |
45//! |--------|---------|
46//! | [`context`] | Engine state — start here with [`context::QwenCtx::load`] |
47//! | [`transcribe`] | Offline, segmented, and streaming transcription |
48//! | [`audio`] | WAV loading, resampling, mel spectrogram |
49//! | [`align`] | Forced alignment (word/character timestamps) |
50//! | [`config`] | Model configuration and variant detection |
51//! | [`tokenizer`] | GPT-2 byte-level BPE tokenizer |
52//!
53//! The remaining modules (`encoder`, `decoder`, `kernels`, `safetensors`) are
54//! implementation details and not intended for direct use.
55
56pub mod config;
57pub mod safetensors;
58pub mod audio;
59pub mod tokenizer;
60pub mod kernels;
61pub mod encoder;
62pub mod decoder;
63pub mod context;
64pub mod transcribe;
65pub mod align;
66#[cfg(any(feature = "ios", feature = "android", feature = "macos-ffi"))]
67pub mod c_api;
68#[cfg(feature = "android")]
69pub mod jni_api;
70
71/// Returns a list of compile-time optimization flags enabled for this build.
72pub fn optimization_flags() -> Vec<&'static str> {
73    let mut flags = Vec::new();
74
75    if cfg!(feature = "vdsp") {
76        flags.push("vDSP/Accelerate");
77    }
78    if cfg!(feature = "blas") && !cfg!(feature = "vdsp") {
79        flags.push("BLAS");
80    }
81
82    // Architecture-specific SIMD
83    if cfg!(target_arch = "aarch64") {
84        flags.push("NEON");
85        if cfg!(target_feature = "dotprod") {
86            flags.push("DotProd");
87        }
88    } else if cfg!(target_arch = "x86_64") {
89        if cfg!(target_feature = "avx2") {
90            flags.push("AVX2");
91        } else if cfg!(target_feature = "avx") {
92            flags.push("AVX");
93        } else if cfg!(target_feature = "sse4.1") {
94            flags.push("SSE4.1");
95        }
96        if cfg!(target_feature = "fma") {
97            flags.push("FMA");
98        }
99    }
100
101    if flags.is_empty() {
102        flags.push("generic");
103    }
104
105    flags
106}