qwen_asr/lib.rs
1//! CPU-only Qwen3-ASR speech recognition in pure Rust.
2//!
3//! BLAS and SIMD optimizations are selected automatically at compile time based
4//! on the target platform — Accelerate + NEON on macOS/aarch64, OpenBLAS + AVX2
5//! on Linux/x86_64, etc. For best performance on x86_64, build with:
6//!
7//! ```bash
8//! RUSTFLAGS="-C target-cpu=native" cargo build --release
9//! ```
10//!
11//! **Important:** Always build in release mode (`--release`). Debug builds are
12//! 10–50x slower and unusable for real-time inference.
13//!
14//! # Quick Start
15//!
16//! ```rust,no_run
17//! use qwen_asr::context::QwenCtx;
18//! use qwen_asr::transcribe;
19//!
20//! let mut ctx = QwenCtx::load("qwen3-asr-0.6b").expect("model not found");
21//! let text = transcribe::transcribe(&mut ctx, "audio.wav").unwrap();
22//! println!("{text}");
23//! ```
24//!
25//! # Forced Alignment
26//!
27//! With the aligner model variant you can obtain word-level timestamps for a
28//! known transcript:
29//!
30//! ```rust,no_run
31//! use qwen_asr::context::QwenCtx;
32//! use qwen_asr::align;
33//!
34//! let mut ctx = QwenCtx::load("qwen3-aligner-0.6b").expect("aligner model not found");
35//! let samples: Vec<f32> = vec![]; // 16 kHz mono f32 PCM
36//! let results = align::forced_align(&mut ctx, &samples, "Hello world", "English").unwrap();
37//! for r in &results {
38//! println!("{}: {:.0} – {:.0} ms", r.text, r.start_ms, r.end_ms);
39//! }
40//! ```
41//!
42//! # Module Guide
43//!
44//! | Module | Purpose |
45//! |--------|---------|
46//! | [`context`] | Engine state — start here with [`context::QwenCtx::load`] |
47//! | [`transcribe`] | Offline, segmented, and streaming transcription |
48//! | [`audio`] | WAV loading, resampling, mel spectrogram |
49//! | [`align`] | Forced alignment (word/character timestamps) |
50//! | [`config`] | Model configuration and variant detection |
51//! | [`tokenizer`] | GPT-2 byte-level BPE tokenizer |
52//!
53//! The remaining modules (`encoder`, `decoder`, `kernels`, `safetensors`) are
54//! implementation details and not intended for direct use.
55
56pub mod config;
57pub mod safetensors;
58pub mod audio;
59pub mod tokenizer;
60pub mod kernels;
61pub mod encoder;
62pub mod decoder;
63pub mod context;
64pub mod transcribe;
65pub mod align;
66#[cfg(any(feature = "ios", feature = "android", feature = "macos-ffi"))]
67pub mod c_api;
68#[cfg(feature = "android")]
69pub mod jni_api;
70
71/// Returns a list of compile-time optimization flags enabled for this build.
72pub fn optimization_flags() -> Vec<&'static str> {
73 let mut flags = Vec::new();
74
75 if cfg!(feature = "vdsp") {
76 flags.push("vDSP/Accelerate");
77 }
78 if cfg!(feature = "blas") && !cfg!(feature = "vdsp") {
79 flags.push("BLAS");
80 }
81
82 // Architecture-specific SIMD
83 if cfg!(target_arch = "aarch64") {
84 flags.push("NEON");
85 if cfg!(target_feature = "dotprod") {
86 flags.push("DotProd");
87 }
88 } else if cfg!(target_arch = "x86_64") {
89 if cfg!(target_feature = "avx2") {
90 flags.push("AVX2");
91 } else if cfg!(target_feature = "avx") {
92 flags.push("AVX");
93 } else if cfg!(target_feature = "sse4.1") {
94 flags.push("SSE4.1");
95 }
96 if cfg!(target_feature = "fma") {
97 flags.push("FMA");
98 }
99 }
100
101 if flags.is_empty() {
102 flags.push("generic");
103 }
104
105 flags
106}